diff --git a/.coveragerc b/.coveragerc index d4925275f..8d062f488 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,7 +5,7 @@ omit = */llama_stack/templates/* .venv/* */llama_stack/cli/scripts/* - */llama_stack/ui/* + */llama_stack_ui/* */llama_stack/distribution/ui/* */llama_stack/strong_typing/* */llama_stack/env.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8b17510b7..75636525e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,4 @@ # These owners will be the default owners for everything in # the repo. Unless a later match takes precedence, -* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo +* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern diff --git a/.github/actions/install-llama-stack-client/action.yml b/.github/actions/install-llama-stack-client/action.yml new file mode 100644 index 000000000..3c1c77d9c --- /dev/null +++ b/.github/actions/install-llama-stack-client/action.yml @@ -0,0 +1,60 @@ +name: Install llama-stack-client +description: Install llama-stack-client based on branch context and client-version input + +inputs: + client-version: + description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.' + required: false + default: "" + +outputs: + uv-extra-index-url: + description: 'UV_EXTRA_INDEX_URL to use (set for release branches)' + value: ${{ steps.configure.outputs.uv-extra-index-url }} + install-after-sync: + description: 'Whether to install client after uv sync' + value: ${{ steps.configure.outputs.install-after-sync }} + install-source: + description: 'Where to install client from after sync' + value: ${{ steps.configure.outputs.install-source }} + +runs: + using: "composite" + steps: + - name: Configure client installation + id: configure + shell: bash + run: | + # Determine the branch we're working with + BRANCH="${{ github.base_ref || github.ref }}" + BRANCH="${BRANCH#refs/heads/}" + + echo "Working with branch: $BRANCH" + + # On release branches: use test.pypi for uv sync, then install from git + # On non-release branches: install based on client-version after sync + if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + echo "Detected release branch: $BRANCH" + + # Check if matching branch exists in client repo + if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then + echo "::error::Branch $BRANCH not found in llama-stack-client-python repository" + echo "::error::Please create the matching release branch in llama-stack-client-python before testing" + exit 1 + fi + + # Configure to use test.pypi as extra index (PyPI is primary) + echo "uv-extra-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT + echo "install-after-sync=true" >> $GITHUB_OUTPUT + echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT + elif [ "${{ inputs.client-version }}" = "latest" ]; then + # Install from main git after sync + echo "install-after-sync=true" >> $GITHUB_OUTPUT + echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT + elif [ "${{ inputs.client-version }}" = "published" ]; then + # Use published version from PyPI (installed by sync) + echo "install-after-sync=false" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.client-version }}" ]; then + echo "::error::Invalid client-version: ${{ inputs.client-version }}" + exit 1 + fi diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index ac600d570..d44cba4ee 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -72,7 +72,8 @@ runs: echo "New recordings detected, committing and pushing" git add tests/integration/ - git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" + git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})" + git fetch origin ${{ github.ref_name }} git rebase origin/${{ github.ref_name }} echo "Rebased successfully" @@ -88,13 +89,15 @@ runs: run: | # Ollama logs (if ollama container exists) sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + # vllm logs (if vllm container exists) + sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true # Note: distro container logs are now dumped in integration-tests.sh before container is removed - name: Upload logs if: ${{ always() }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: - name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }} + name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }} path: | *.log retention-days: 1 diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml index 905d6b73a..3237abb67 100644 --- a/.github/actions/setup-runner/action.yml +++ b/.github/actions/setup-runner/action.yml @@ -18,25 +18,35 @@ runs: python-version: ${{ inputs.python-version }} version: 0.7.6 + - name: Configure client installation + id: client-config + uses: ./.github/actions/install-llama-stack-client + with: + client-version: ${{ inputs.client-version }} + - name: Install dependencies shell: bash + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} run: | + # Export UV env vars for current step and persist to GITHUB_ENV for subsequent steps + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY=unsafe-best-match + echo "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" >> $GITHUB_ENV + echo "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" >> $GITHUB_ENV + echo "Exported UV environment variables for current and subsequent steps" + fi + echo "Updating project dependencies via uv sync" uv sync --all-groups echo "Installing ad-hoc dependencies" uv pip install faiss-cpu - # Install llama-stack-client-python based on the client-version input - if [ "${{ inputs.client-version }}" = "latest" ]; then - echo "Installing latest llama-stack-client-python from main branch" - uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main - elif [ "${{ inputs.client-version }}" = "published" ]; then - echo "Installing published llama-stack-client-python from PyPI" - uv pip install llama-stack-client - else - echo "Invalid client-version: ${{ inputs.client-version }}" - exit 1 + # Install specific client version after sync if needed + if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then + echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}" + uv pip install ${{ steps.client-config.outputs.install-source }} fi echo "Installed llama packages" diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index ee9011ed8..1c9d019cc 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -39,21 +39,36 @@ runs: if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }} uses: ./.github/actions/setup-vllm + - name: Start Postgres service + if: ${{ contains(inputs.setup, 'postgres') }} + shell: bash + run: | + sudo docker rm -f postgres-ci || true + sudo docker run -d --name postgres-ci \ + -e POSTGRES_USER=llamastack \ + -e POSTGRES_PASSWORD=llamastack \ + -e POSTGRES_DB=llamastack \ + -p 5432:5432 \ + postgres:16 + + echo "Waiting for Postgres to become ready..." + for i in {1..30}; do + if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then + echo "Postgres is ready" + break + fi + if [ "$i" -eq 30 ]; then + echo "Postgres failed to start in time" + sudo docker logs postgres-ci || true + exit 1 + fi + sleep 2 + done + - name: Build Llama Stack shell: bash run: | - # Install llama-stack-client-python based on the client-version input - if [ "${{ inputs.client-version }}" = "latest" ]; then - echo "Installing latest llama-stack-client-python from main branch" - export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main - elif [ "${{ inputs.client-version }}" = "published" ]; then - echo "Installing published llama-stack-client-python from PyPI" - unset LLAMA_STACK_CLIENT_DIR - else - echo "Invalid client-version: ${{ inputs.client-version }}" - exit 1 - fi - + # Client is already installed by setup-runner (handles both main and release branches) echo "Building Llama Stack" LLAMA_STACK_DIR=. \ diff --git a/.github/actions/setup-typescript-client/action.yml b/.github/actions/setup-typescript-client/action.yml new file mode 100644 index 000000000..8b78ba70c --- /dev/null +++ b/.github/actions/setup-typescript-client/action.yml @@ -0,0 +1,35 @@ +name: Setup TypeScript client +description: Conditionally checkout and link llama-stack-client-typescript based on client-version +inputs: + client-version: + description: 'Client version (latest or published)' + required: true + +outputs: + ts-client-path: + description: 'Path or version to use for TypeScript client' + value: ${{ steps.set-path.outputs.ts-client-path }} + +runs: + using: "composite" + steps: + - name: Checkout TypeScript client (latest) + if: ${{ inputs.client-version == 'latest' }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: llamastack/llama-stack-client-typescript + ref: main + path: .ts-client-checkout + + - name: Set TS_CLIENT_PATH + id: set-path + shell: bash + run: | + if [ "${{ inputs.client-version }}" = "latest" ]; then + echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT + elif [ "${{ inputs.client-version }}" = "published" ]; then + echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT + else + echo "::error::Invalid client-version: ${{ inputs.client-version }}" + exit 1 + fi diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml index 17ebd42f2..34ced0998 100644 --- a/.github/actions/setup-vllm/action.yml +++ b/.github/actions/setup-vllm/action.yml @@ -11,13 +11,14 @@ runs: --name vllm \ -p 8000:8000 \ --privileged=true \ - quay.io/higginsd/vllm-cpu:65393ee064 \ + quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \ --host 0.0.0.0 \ --port 8000 \ --enable-auto-tool-choice \ - --tool-call-parser llama3_json \ - --model /root/.cache/Llama-3.2-1B-Instruct \ - --served-model-name meta-llama/Llama-3.2-1B-Instruct + --tool-call-parser hermes \ + --model /root/.cache/Qwen3-0.6B \ + --served-model-name Qwen/Qwen3-0.6B \ + --max-model-len 8192 # Wait for vllm to be ready echo "Waiting for vllm to be ready..." diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f88402a7a..9c400a73f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -22,7 +22,7 @@ updates: prefix: chore(python-deps) - package-ecosystem: npm - directory: "/llama_stack/ui" + directory: "/llama_stack_ui" schedule: interval: "weekly" day: "saturday" diff --git a/.github/mergify.yml b/.github/mergify.yml new file mode 100644 index 000000000..a96191958 --- /dev/null +++ b/.github/mergify.yml @@ -0,0 +1,23 @@ +pull_request_rules: +- name: ping author on conflicts and add 'needs-rebase' label + conditions: + - conflict + - -closed + actions: + label: + add: + - needs-rebase + comment: + message: > + This pull request has merge conflicts that must be resolved before it + can be merged. @{{author}} please rebase it. + https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork + +- name: remove 'needs-rebase' label when conflict is resolved + conditions: + - -conflict + - -closed + actions: + label: + remove: + - needs-rebase diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 00a8f54ac..8eb31c79b 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -4,7 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Name | File | Purpose | | ---- | ---- | ------- | -| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md | +| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs | | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. | | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script | | Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication | @@ -12,12 +12,12 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode | | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers | | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks | -| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR | | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build | | Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps | | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project | | Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration | | Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec | +| Stainless SDK Builds | [stainless-builds.yml](stainless-builds.yml) | Build Stainless SDK from OpenAPI spec changes | | Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action | | Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module | | Test External API and Providers | [test-external.yml](test-external.yml) | Test the External API and Provider mechanisms | diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml new file mode 100644 index 000000000..cf74e8d7f --- /dev/null +++ b/.github/workflows/backward-compat.yml @@ -0,0 +1,578 @@ +name: Backward Compatibility Check + +run-name: Check backward compatibility for run.yaml configs + +on: + pull_request: + branches: + - main + - 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+' + - 'release-[0-9]+.[0-9]+.[0-9]+' + - 'release-[0-9]+.[0-9]+' + paths: + - 'src/llama_stack/core/datatypes.py' + - 'src/llama_stack/providers/datatypes.py' + - 'src/llama_stack/distributions/**/run.yaml' + - 'tests/backward_compat/**' + - '.github/workflows/backward-compat.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + check-main-compatibility: + name: Check Compatibility with main + runs-on: ubuntu-latest + + steps: + - name: Checkout PR branch + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 # Need full history to access main branch + + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4 + with: + enable-cache: true + + - name: Install dependencies + run: | + uv sync --group dev + + - name: Extract run.yaml files from main branch + id: extract_configs + run: | + # Get list of run.yaml paths from main + git fetch origin main + CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true) + + if [ -z "$CONFIG_PATHS" ]; then + echo "No run.yaml files found in main branch" + exit 1 + fi + + # Extract all configs to a temp directory + mkdir -p /tmp/main_configs + echo "Extracting configs from main branch:" + + while IFS= read -r config_path; do + if [ -z "$config_path" ]; then + continue + fi + + # Extract filename for storage + filename=$(basename $(dirname "$config_path")) + echo " - $filename (from $config_path)" + + git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml" + done <<< "$CONFIG_PATHS" + + echo "" + echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files" + + - name: Test all configs from main + id: test_configs + continue-on-error: true + run: | + # Run pytest once with all configs parameterized + if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then + echo "failed=false" >> $GITHUB_OUTPUT + else + echo "failed=true" >> $GITHUB_OUTPUT + exit 1 + fi + + - name: Check for breaking change acknowledgment + id: check_ack + if: steps.test_configs.outputs.failed == 'true' + run: | + echo "Breaking changes detected. Checking for acknowledgment..." + + # Check PR title for '!:' marker (conventional commits) + PR_TITLE="${{ github.event.pull_request.title }}" + if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then + echo "✓ Breaking change acknowledged in PR title" + echo "acknowledged=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check commit messages for BREAKING CHANGE: + if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then + echo "✓ Breaking change acknowledged in commit message" + echo "acknowledged=true" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "✗ Breaking change NOT acknowledged" + echo "acknowledged=false" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ github.token }} + + - name: Evaluate results + if: always() + run: | + FAILED="${{ steps.test_configs.outputs.failed }}" + ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}" + + if [[ "$FAILED" == "true" ]]; then + if [[ "$ACKNOWLEDGED" == "true" ]]; then + echo "" + echo "⚠️ WARNING: Breaking changes detected but acknowledged" + echo "" + echo "This PR introduces backward-incompatible changes to run.yaml." + echo "The changes have been properly acknowledged." + echo "" + exit 0 # Pass the check + else + echo "" + echo "❌ ERROR: Breaking changes detected without acknowledgment" + echo "" + echo "This PR introduces backward-incompatible changes to run.yaml" + echo "that will break existing user configurations." + echo "" + echo "To acknowledge this breaking change, do ONE of:" + echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')" + echo " 2. Add the 'breaking-change' label to this PR" + echo " 3. Include 'BREAKING CHANGE:' in a commit message" + echo "" + exit 1 # Fail the check + fi + fi + + test-integration-main: + name: Run Integration Tests with main Config + runs-on: ubuntu-latest + + steps: + - name: Checkout PR branch + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 + + - name: Extract ci-tests run.yaml from main + run: | + git fetch origin main + git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml + echo "Extracted ci-tests run.yaml from main branch" + + - name: Setup test environment + uses: ./.github/actions/setup-test-environment + with: + python-version: '3.12' + client-version: 'latest' + setup: 'ollama' + suite: 'base' + inference-mode: 'replay' + + - name: Run integration tests with main config + id: test_integration + continue-on-error: true + uses: ./.github/actions/run-and-record-tests + with: + stack-config: /tmp/main-ci-tests-run.yaml + setup: 'ollama' + inference-mode: 'replay' + suite: 'base' + + - name: Check for breaking change acknowledgment + id: check_ack + if: steps.test_integration.outcome == 'failure' + run: | + echo "Integration tests failed. Checking for acknowledgment..." + + # Check PR title for '!:' marker (conventional commits) + PR_TITLE="${{ github.event.pull_request.title }}" + if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then + echo "✓ Breaking change acknowledged in PR title" + echo "acknowledged=true" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check commit messages for BREAKING CHANGE: + if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then + echo "✓ Breaking change acknowledged in commit message" + echo "acknowledged=true" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "✗ Breaking change NOT acknowledged" + echo "acknowledged=false" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ github.token }} + + - name: Evaluate integration test results + if: always() + run: | + TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}" + ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}" + + if [[ "$TEST_FAILED" == "true" ]]; then + if [[ "$ACKNOWLEDGED" == "true" ]]; then + echo "" + echo "⚠️ WARNING: Integration tests failed with main config but acknowledged" + echo "" + exit 0 # Pass the check + else + echo "" + echo "❌ ERROR: Integration tests failed with main config without acknowledgment" + echo "" + echo "To acknowledge this breaking change, do ONE of:" + echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')" + echo " 2. Include 'BREAKING CHANGE:' in a commit message" + echo "" + exit 1 # Fail the check + fi + fi + + test-integration-release: + name: Run Integration Tests with Latest Release (Informational) + runs-on: ubuntu-latest + + steps: + - name: Checkout PR branch + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + fetch-depth: 0 + + - name: Get latest release + id: get_release + run: | + # Get the latest release from GitHub + LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "") + + if [ -z "$LATEST_TAG" ]; then + echo "No releases found, skipping release compatibility check" + echo "has_release=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "Latest release: $LATEST_TAG" + echo "has_release=true" >> $GITHUB_OUTPUT + echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ github.token }} + + - name: Extract ci-tests run.yaml from release + if: steps.get_release.outputs.has_release == 'true' + id: extract_config + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + + # Try with src/ prefix first (newer releases), then without (older releases) + if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then + echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)" + echo "has_config=true" >> $GITHUB_OUTPUT + elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then + echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)" + echo "has_config=true" >> $GITHUB_OUTPUT + else + echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG" + echo "has_config=false" >> $GITHUB_OUTPUT + fi + + - name: Setup test environment + if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + uses: ./.github/actions/setup-test-environment + with: + python-version: '3.12' + client-version: 'latest' + setup: 'ollama' + suite: 'base' + inference-mode: 'replay' + + - name: Run integration tests with release config (PR branch) + id: test_release_pr + if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + continue-on-error: true + uses: ./.github/actions/run-and-record-tests + with: + stack-config: /tmp/release-ci-tests-run.yaml + setup: 'ollama' + inference-mode: 'replay' + suite: 'base' + + - name: Checkout main branch to test baseline + if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + run: | + git checkout origin/main + + - name: Setup test environment for main + if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + uses: ./.github/actions/setup-test-environment + with: + python-version: '3.12' + client-version: 'latest' + setup: 'ollama' + suite: 'base' + inference-mode: 'replay' + + - name: Run integration tests with release config (main branch) + id: test_release_main + if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + continue-on-error: true + uses: ./.github/actions/run-and-record-tests + with: + stack-config: /tmp/release-ci-tests-run.yaml + setup: 'ollama' + inference-mode: 'replay' + suite: 'base' + + - name: Report results and post PR comment + if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true' + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + PR_OUTCOME="${{ steps.test_release_pr.outcome }}" + MAIN_OUTCOME="${{ steps.test_release_main.outcome }}" + + if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then + # NEW breaking change - PR fails but main passes + echo "::error::🚨 This PR introduces a NEW breaking change!" + + # Check if we already posted a comment (to avoid spam on every push) + EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1) + + if [[ -z "$EXISTING_COMMENT" ]]; then + gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected + + **Integration tests against release \`$RELEASE_TAG\` are now failing** + + ⚠️ This PR introduces a breaking change that affects compatibility with the latest release. + + - Users on release \`$RELEASE_TAG\` may not be able to upgrade + - Existing configurations may break + + The tests pass on \`main\` but fail with this PR's changes. + + > **Note:** This is informational only and does not block merge. + > Consider whether this breaking change is acceptable for users." + else + echo "Comment already exists, skipping to avoid spam" + fi + + cat >> $GITHUB_STEP_SUMMARY < **Note:** This is informational only and does not block merge. + > Consider whether this breaking change is acceptable for users. + EOF + + elif [[ "$PR_OUTCOME" == "failure" ]]; then + # Existing breaking change - both PR and main fail + echo "::warning::Breaking change already exists in main branch" + + cat >> $GITHUB_STEP_SUMMARY < **Note:** This is informational only. + EOF + + else + # Success - tests pass + cat >> $GITHUB_STEP_SUMMARY </dev/null || echo "") + + if [ -z "$LATEST_TAG" ]; then + echo "No releases found, skipping release compatibility check" + echo "has_release=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "Latest release: $LATEST_TAG" + echo "has_release=true" >> $GITHUB_OUTPUT + echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ github.token }} + + - name: Extract configs from release + if: steps.get_release.outputs.has_release == 'true' + id: extract_release_configs + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + + # Get run.yaml files from the release (try both src/ and old path) + CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true) + + if [ -z "$CONFIG_PATHS" ]; then + echo "::warning::No run.yaml files found in release $RELEASE_TAG" + echo "has_configs=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Extract all configs to a temp directory + mkdir -p /tmp/release_configs + echo "Extracting configs from release $RELEASE_TAG:" + + while IFS= read -r config_path; do + if [ -z "$config_path" ]; then + continue + fi + + filename=$(basename $(dirname "$config_path")) + echo " - $filename (from $config_path)" + + git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true + done <<< "$CONFIG_PATHS" + + echo "" + echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files" + echo "has_configs=true" >> $GITHUB_OUTPUT + + - name: Test against release configs (PR branch) + id: test_schema_pr + if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true' + continue-on-error: true + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short + + - name: Checkout main branch to test baseline + if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true' + run: | + git checkout origin/main + + - name: Install dependencies for main + if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true' + run: | + uv sync --group dev + + - name: Test against release configs (main branch) + id: test_schema_main + if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true' + continue-on-error: true + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short + + - name: Report results and post PR comment + if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true' + run: | + RELEASE_TAG="${{ steps.get_release.outputs.tag }}" + PR_OUTCOME="${{ steps.test_schema_pr.outcome }}" + MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}" + + if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then + # NEW breaking change - PR fails but main passes + echo "::error::🚨 This PR introduces a NEW schema breaking change!" + + # Check if we already posted a comment (to avoid spam on every push) + EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1) + + if [[ -z "$EXISTING_COMMENT" ]]; then + gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected + + **Schema validation against release \`$RELEASE_TAG\` is now failing** + + ⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release. + + - Users on release \`$RELEASE_TAG\` will not be able to upgrade + - Existing run.yaml configurations will fail validation + + The tests pass on \`main\` but fail with this PR's changes. + + > **Note:** This is informational only and does not block merge. + > Consider whether this breaking change is acceptable for users." + else + echo "Comment already exists, skipping to avoid spam" + fi + + cat >> $GITHUB_STEP_SUMMARY < **Note:** This is informational only and does not block merge. + > Consider whether this breaking change is acceptable for users. + EOF + + elif [[ "$PR_OUTCOME" == "failure" ]]; then + # Existing breaking change - both PR and main fail + echo "::warning::Schema breaking change already exists in main branch" + + cat >> $GITHUB_STEP_SUMMARY < **Note:** This is informational only. + EOF + + else + # Success - tests pass + cat >> $GITHUB_STEP_SUMMARY <> $GITHUB_OUTPUT + echo "Generated matrix: $MATRIX" run-replay-mode-tests: + needs: generate-matrix runs-on: ubuntu-latest - name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} + name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} strategy: fail-fast: false matrix: - client-type: [library, server, docker] + client: [library, docker, server] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} - # Define (setup, suite) pairs - they are always matched and cannot be independent - # Weekly schedule (Sun 1 AM): vllm+base - # Input test-setup=ollama-vision: ollama-vision+vision - # Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses - config: >- - ${{ - github.event.schedule == '1 0 * * 0' - && fromJSON('[{"setup": "vllm", "suite": "base"}]') - || github.event.inputs.test-setup == 'ollama-vision' - && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]') - || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]') - }} + # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py + # See scripts/generate_ci_matrix.py for generation logic + config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }} steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Setup test environment + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/setup-test-environment with: python-version: ${{ matrix.python-version }} @@ -77,12 +93,33 @@ jobs: suite: ${{ matrix.config.suite }} inference-mode: 'replay' + - name: Setup Node.js for TypeScript client tests + if: ${{ matrix.client == 'server' }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: '20' + cache: 'npm' + cache-dependency-path: tests/integration/client-typescript/package-lock.json + + - name: Setup TypeScript client + if: ${{ matrix.client == 'server' }} + id: setup-ts-client + uses: ./.github/actions/setup-typescript-client + with: + client-version: ${{ matrix.client-version }} + - name: Run tests + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/run-and-record-tests env: OPENAI_API_KEY: dummy + TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }} with: - stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }} + stack-config: >- + ${{ matrix.config.stack_config + || (matrix.client == 'library' && 'ci-tests') + || (matrix.client == 'server' && 'server:ci-tests') + || 'docker:ci-tests' }} setup: ${{ matrix.config.setup }} inference-mode: 'replay' suite: ${{ matrix.config.suite }} diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index a6a86b15f..fc6ac0600 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -4,12 +4,16 @@ run-name: Run the integration test suite with various VectorIO providers on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - - 'llama_stack/**' - - '!llama_stack/ui/**' + - 'src/llama_stack/**' + - '!src/llama_stack_ui/**' - 'tests/integration/vector_io/**' - 'uv.lock' - 'pyproject.toml' @@ -33,7 +37,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -194,7 +198,7 @@ jobs: - name: Upload all logs to artifacts if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }} path: | diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 0fdd50acc..10a663514 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,7 +5,9 @@ run-name: Run pre-commit checks on: pull_request: push: - branches: [main] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' concurrency: group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} @@ -20,7 +22,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: # For dependabot PRs, we need to checkout with a token that can push changes token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }} @@ -28,7 +30,7 @@ jobs: fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }} - name: Set up Python - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.12' cache: pip @@ -41,25 +43,43 @@ jobs: with: node-version: '20' cache: 'npm' - cache-dependency-path: 'llama_stack/ui/' + cache-dependency-path: 'src/llama_stack_ui/' + + - name: Set up uv + uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4 - name: Install npm dependencies run: npm ci - working-directory: llama_stack/ui + working-directory: src/llama_stack_ui + + - name: Install pre-commit + run: python -m pip install 'pre-commit>=4.4.0' + + - name: Cache pre-commit + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} - name: Run pre-commit id: precommit - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - continue-on-error: true + run: | + set +e + pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log + status=${PIPESTATUS[0]} + echo "status=$status" >> $GITHUB_OUTPUT + exit 0 env: - SKIP: no-commit-to-branch + SKIP: no-commit-to-branch,mypy RUFF_OUTPUT_FORMAT: github - name: Check pre-commit results - if: steps.precommit.outcome == 'failure' + if: steps.precommit.outputs.status != '0' run: | echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes." - echo "::warning::Some pre-commit hooks failed. Check the output above for details." + echo "" + echo "Failed hooks output:" + cat /tmp/precommit.log exit 1 - name: Debug @@ -109,3 +129,50 @@ jobs: echo "$unstaged_files" exit 1 fi + + - name: Configure client installation + id: client-config + uses: ./.github/actions/install-llama-stack-client + + - name: Sync dev + type_checking dependencies + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} + run: | + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY="unsafe-best-match" + fi + + uv sync --group dev --group type_checking + + # Install specific client version after sync if needed + if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then + echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}" + uv pip install ${{ steps.client-config.outputs.install-source }} + fi + + - name: Run mypy (full type_checking) + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} + run: | + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY="unsafe-best-match" + fi + + set +e + uv run --group dev --group type_checking mypy + status=$? + if [ $status -ne 0 ]; then + echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'." + fi + exit $status + + - name: Check if any unused recordings + run: | + set -e + PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete + changes=$(git status --short tests/integration | grep 'recordings' || true) + if [ -n "$changes" ]; then + echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions." + echo "$changes" + exit 1 + fi diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml deleted file mode 100644 index b05898d29..000000000 --- a/.github/workflows/precommit-trigger.yml +++ /dev/null @@ -1,227 +0,0 @@ -name: Pre-commit Bot - -run-name: Pre-commit bot for PR #${{ github.event.issue.number }} - -on: - issue_comment: - types: [created] - -jobs: - pre-commit: - # Only run on pull request comments - if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit') - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - - steps: - - name: Check comment author and get PR details - id: check_author - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - // Get PR details - const pr = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: context.issue.number - }); - - // Check if commenter has write access or is the PR author - const commenter = context.payload.comment.user.login; - const prAuthor = pr.data.user.login; - - let hasPermission = false; - - // Check if commenter is PR author - if (commenter === prAuthor) { - hasPermission = true; - console.log(`Comment author ${commenter} is the PR author`); - } else { - // Check if commenter has write/admin access - try { - const permission = await github.rest.repos.getCollaboratorPermissionLevel({ - owner: context.repo.owner, - repo: context.repo.repo, - username: commenter - }); - - const level = permission.data.permission; - hasPermission = ['write', 'admin', 'maintain'].includes(level); - console.log(`Comment author ${commenter} has permission: ${level}`); - } catch (error) { - console.log(`Could not check permissions for ${commenter}: ${error.message}`); - } - } - - if (!hasPermission) { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.` - }); - core.setFailed(`User ${commenter} does not have permission`); - return; - } - - // Save PR info for later steps - core.setOutput('pr_number', context.issue.number); - core.setOutput('pr_head_ref', pr.data.head.ref); - core.setOutput('pr_head_sha', pr.data.head.sha); - core.setOutput('pr_head_repo', pr.data.head.repo.full_name); - core.setOutput('pr_base_ref', pr.data.base.ref); - core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name); - core.setOutput('authorized', 'true'); - - - name: React to comment - if: steps.check_author.outputs.authorized == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.reactions.createForIssueComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: context.payload.comment.id, - content: 'rocket' - }); - - - name: Comment starting - if: steps.check_author.outputs.authorized == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...` - }); - - - name: Checkout PR branch (same-repo) - if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false' - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ steps.check_author.outputs.pr_head_ref }} - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Checkout PR branch (fork) - if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true' - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - repository: ${{ steps.check_author.outputs.pr_head_repo }} - ref: ${{ steps.check_author.outputs.pr_head_ref }} - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Verify checkout - if: steps.check_author.outputs.authorized == 'true' - run: | - echo "Current SHA: $(git rev-parse HEAD)" - echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}" - if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then - echo "::error::Checked out SHA does not match expected SHA" - exit 1 - fi - - - name: Set up Python - if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - with: - python-version: '3.12' - cache: pip - cache-dependency-path: | - **/requirements*.txt - .pre-commit-config.yaml - - - name: Set up Node.js - if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 - with: - node-version: '20' - cache: 'npm' - cache-dependency-path: 'llama_stack/ui/' - - - name: Install npm dependencies - if: steps.check_author.outputs.authorized == 'true' - run: npm ci - working-directory: llama_stack/ui - - - name: Run pre-commit - if: steps.check_author.outputs.authorized == 'true' - id: precommit - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - continue-on-error: true - env: - SKIP: no-commit-to-branch - RUFF_OUTPUT_FORMAT: github - - - name: Check for changes - if: steps.check_author.outputs.authorized == 'true' - id: changes - run: | - if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT - echo "Changes detected after pre-commit" - else - echo "has_changes=false" >> $GITHUB_OUTPUT - echo "No changes after pre-commit" - fi - - - name: Commit and push changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true' - run: | - git config --local user.email "github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - - git add -A - git commit -m "style: apply pre-commit fixes - - 🤖 Applied by @github-actions bot via pre-commit workflow" - - # Push changes - git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }} - - - name: Comment success with changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.` - }); - - - name: Comment success without changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.` - }); - - - name: Comment failure - if: failure() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.` - }); diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index ffc44f9c1..9affe3d3f 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -7,24 +7,24 @@ on: branches: - main paths: - - 'llama_stack/cli/stack/build.py' - - 'llama_stack/cli/stack/_build.py' - - 'llama_stack/core/build.*' - - 'llama_stack/core/*.sh' + - 'src/llama_stack/cli/stack/build.py' + - 'src/llama_stack/cli/stack/_build.py' + - 'src/llama_stack/core/build.*' + - 'src/llama_stack/core/*.sh' - '.github/workflows/providers-build.yml' - - 'llama_stack/distributions/**' + - 'src/llama_stack/distributions/**' - 'pyproject.toml' - 'containers/Containerfile' - '.dockerignore' pull_request: paths: - - 'llama_stack/cli/stack/build.py' - - 'llama_stack/cli/stack/_build.py' - - 'llama_stack/core/build.*' - - 'llama_stack/core/*.sh' + - 'src/llama_stack/cli/stack/build.py' + - 'src/llama_stack/cli/stack/_build.py' + - 'src/llama_stack/core/build.*' + - 'src/llama_stack/core/*.sh' - '.github/workflows/providers-build.yml' - - 'llama_stack/distributions/**' + - 'src/llama_stack/distributions/**' - 'pyproject.toml' - 'containers/Containerfile' - '.dockerignore' @@ -40,12 +40,12 @@ jobs: distros: ${{ steps.set-matrix.outputs.distros }} steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Generate Distribution List id: set-matrix run: | - distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') + distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') echo "distros=$distros" >> "$GITHUB_OUTPUT" build: @@ -59,7 +59,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -72,10 +72,16 @@ jobs: - name: Build container image if: matrix.image-type == 'container' run: | + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=${{ matrix.distro }}" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=${{ matrix.distro }} \ + $BUILD_ARGS \ --tag llama-stack:${{ matrix.distro }}-ci - name: Print dependencies in the image @@ -87,7 +93,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -100,20 +106,26 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner - name: Build container image run: | - BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml) + BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" + BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=ci-tests \ - --build-arg BASE_IMAGE="$BASE_IMAGE" \ - --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \ + $BUILD_ARGS \ -t llama-stack:ci-tests - name: Inspect the container image entrypoint @@ -134,7 +146,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -143,17 +155,23 @@ jobs: run: | yq -i ' .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest" - ' llama_stack/distributions/ci-tests/build.yaml + ' src/llama_stack/distributions/ci-tests/build.yaml - name: Build UBI9 container image run: | - BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml) + BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" + BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=ci-tests \ - --build-arg BASE_IMAGE="$BASE_IMAGE" \ - --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \ + $BUILD_ARGS \ -t llama-stack:ci-tests-ubi9 - name: Inspect UBI9 image diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index e30e1e5fb..a8024546d 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -7,22 +7,22 @@ on: branches: - main paths: - - 'llama_stack/cli/stack/list_deps.py' - - 'llama_stack/cli/stack/_list_deps.py' - - 'llama_stack/core/build.*' - - 'llama_stack/core/*.sh' + - 'src/llama_stack/cli/stack/list_deps.py' + - 'src/llama_stack/cli/stack/_list_deps.py' + - 'src/llama_stack/core/build.*' + - 'src/llama_stack/core/*.sh' - '.github/workflows/providers-list-deps.yml' - - 'llama_stack/templates/**' + - 'src/llama_stack/templates/**' - 'pyproject.toml' pull_request: paths: - - 'llama_stack/cli/stack/list_deps.py' - - 'llama_stack/cli/stack/_list_deps.py' - - 'llama_stack/core/build.*' - - 'llama_stack/core/*.sh' + - 'src/llama_stack/cli/stack/list_deps.py' + - 'src/llama_stack/cli/stack/_list_deps.py' + - 'src/llama_stack/core/build.*' + - 'src/llama_stack/core/*.sh' - '.github/workflows/providers-list-deps.yml' - - 'llama_stack/templates/**' + - 'src/llama_stack/templates/**' - 'pyproject.toml' concurrency: @@ -36,12 +36,12 @@ jobs: distros: ${{ steps.set-matrix.outputs.distros }} steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Generate Distribution List id: set-matrix run: | - distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') + distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') echo "distros=$distros" >> "$GITHUB_OUTPUT" list-deps: @@ -55,7 +55,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -92,7 +92,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -102,4 +102,4 @@ jobs: USE_COPY_NOT_MOUNT: "true" LLAMA_STACK_DIR: "." run: | - uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml + uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index 96243285f..9c79021cf 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -10,7 +10,7 @@ on: branches: - main paths-ignore: - - 'llama_stack/ui/**' + - 'src/llama_stack_ui/**' jobs: build: @@ -21,22 +21,25 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install uv - uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0 + uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4 with: python-version: ${{ matrix.python-version }} activate-environment: true version: 0.7.6 - - name: Build Llama Stack package - run: | - uv build + - name: Build Llama Stack API package + working-directory: src/llama_stack_api + run: uv build - - name: Install Llama Stack package + - name: Build Llama Stack package + run: uv build + + - name: Install Llama Stack package (with api stubs from local build) run: | - uv pip install dist/*.whl + uv pip install --find-links src/llama_stack_api/dist dist/*.whl - name: Verify Llama Stack package run: | @@ -45,3 +48,4 @@ jobs: command -v llama llama stack list-apis llama stack list-providers inference + llama stack list-deps starter diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml index 57f95580e..2093bca04 100644 --- a/.github/workflows/record-integration-tests.yml +++ b/.github/workflows/record-integration-tests.yml @@ -46,7 +46,7 @@ jobs: echo "::endgroup::" - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: fetch-depth: 0 diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml new file mode 100644 index 000000000..9217d5b3f --- /dev/null +++ b/.github/workflows/stainless-builds.yml @@ -0,0 +1,146 @@ +name: Stainless SDK Builds +run-name: Build Stainless SDK from OpenAPI spec changes + +# This workflow uses pull_request_target, which allows it to run on pull requests +# from forks with access to secrets. This is safe because the workflow definition +# comes from the base branch (trusted), and the action only reads OpenAPI spec +# files without executing any code from the PR. + +on: + pull_request_target: + types: + - opened + - synchronize + - reopened + - closed + paths: + - "client-sdks/stainless/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + # Stainless organization name. + STAINLESS_ORG: llamastack + + # Stainless project name. + STAINLESS_PROJECT: llama-stack-client + + # Path to your OpenAPI spec. + OAS_PATH: ./client-sdks/stainless/openapi.yml + + # Path to your Stainless config. Optional; only provide this if you prefer + # to maintain the ground truth Stainless config in your own repo. + CONFIG_PATH: ./client-sdks/stainless/config.yml + + # When to fail the job based on build conclusion. + # Options: "never" | "note" | "warning" | "error" | "fatal". + FAIL_ON: error + + # In your repo secrets, configure: + # - STAINLESS_API_KEY: a Stainless API key, which you can generate on the + # Stainless organization dashboard + +jobs: + compute-branch: + runs-on: ubuntu-latest + outputs: + preview_branch: ${{ steps.compute.outputs.preview_branch }} + base_branch: ${{ steps.compute.outputs.base_branch }} + merge_branch: ${{ steps.compute.outputs.merge_branch }} + steps: + - name: Compute branch names + id: compute + run: | + HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}" + BASE_REPO="${{ github.repository }}" + BRANCH_NAME="${{ github.event.pull_request.head.ref }}" + FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}" + + if [ "$HEAD_REPO" != "$BASE_REPO" ]; then + # Fork PR: prefix with fork owner for isolation + if [ -z "$FORK_OWNER" ]; then + echo "Error: Fork PR detected but fork owner is empty" >&2 + exit 1 + fi + PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}" + BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}" + else + # Same-repo PR + PREVIEW_BRANCH="preview/${BRANCH_NAME}" + BASE_BRANCH="preview/base/${BRANCH_NAME}" + fi + + echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT + echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT + echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT + + preview: + needs: compute-branch + if: github.event.action != 'closed' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + # Checkout the PR's code to access the OpenAPI spec and config files. + # This is necessary to read the spec/config from the PR (including from forks). + - name: Checkout repository + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 2 + + - name: Run preview builds + uses: stainless-api/upload-openapi-spec-action/preview@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0 + with: + stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} + org: ${{ env.STAINLESS_ORG }} + project: ${{ env.STAINLESS_PROJECT }} + oas_path: ${{ env.OAS_PATH }} + config_path: ${{ env.CONFIG_PATH }} + fail_on: ${{ env.FAIL_ON }} + base_sha: ${{ github.event.pull_request.base.sha }} + base_ref: ${{ github.event.pull_request.base.ref }} + head_sha: ${{ github.event.pull_request.head.sha }} + branch: ${{ needs.compute-branch.outputs.preview_branch }} + base_branch: ${{ needs.compute-branch.outputs.base_branch }} + + merge: + needs: compute-branch + if: github.event.action == 'closed' && github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + # Checkout the PR's code to access the OpenAPI spec and config files. + # This is necessary to read the spec/config from the PR (including from forks). + - name: Checkout repository + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 2 + + # Note that this only merges in changes that happened on the last build on + # the computed preview branch. It's possible that there are OAS/config + # changes that haven't been built, if the preview job didn't finish + # before this step starts. In theory we want to wait for all builds + # against the preview branch to complete, but assuming that + # the preview job happens before the PR merge, it should be fine. + - name: Run merge build + uses: stainless-api/upload-openapi-spec-action/merge@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0 + with: + stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} + org: ${{ env.STAINLESS_ORG }} + project: ${{ env.STAINLESS_PROJECT }} + oas_path: ${{ env.OAS_PATH }} + config_path: ${{ env.CONFIG_PATH }} + fail_on: ${{ env.FAIL_ON }} + base_sha: ${{ github.event.pull_request.base.sha }} + base_ref: ${{ github.event.pull_request.base.ref }} + head_sha: ${{ github.event.pull_request.head.sha }} + merge_branch: ${{ needs.compute-branch.outputs.merge_branch }} diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index 21fedd06f..e2dbe00e6 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -8,7 +8,7 @@ on: pull_request: branches: [ main ] paths: - - 'llama_stack/**' + - 'src/llama_stack/**' - 'tests/integration/**' - 'uv.lock' - 'pyproject.toml' @@ -27,7 +27,7 @@ jobs: # container and point 'uv pip install' to the correct path... steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -78,7 +78,7 @@ jobs: - name: Upload all logs to artifacts if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test path: | diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index 3ae6793ea..7a306643d 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -8,8 +8,8 @@ on: pull_request: branches: [ main ] paths: - - 'llama_stack/**' - - '!llama_stack/ui/**' + - 'src/llama_stack/**' + - '!src/llama_stack_ui/**' - 'tests/integration/**' - 'uv.lock' - 'pyproject.toml' @@ -27,7 +27,7 @@ jobs: # container and point 'uv pip install' to the correct path... steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -84,7 +84,7 @@ jobs: - name: Upload all logs to artifacts if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test path: | diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index e8f318b8e..0b8b0ae78 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -8,7 +8,7 @@ on: pull_request: branches: [ main ] paths: - - 'llama_stack/ui/**' + - 'src/llama_stack_ui/**' - '.github/workflows/ui-unit-tests.yml' # This workflow workflow_dispatch: @@ -26,29 +26,29 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Setup Node.js uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 with: node-version: ${{ matrix.node-version }} cache: 'npm' - cache-dependency-path: 'llama_stack/ui/package-lock.json' + cache-dependency-path: 'src/llama_stack_ui/package-lock.json' - name: Install dependencies - working-directory: llama_stack/ui + working-directory: src/llama_stack_ui run: npm ci - name: Run linting - working-directory: llama_stack/ui + working-directory: src/llama_stack_ui run: npm run lint - name: Run format check - working-directory: llama_stack/ui + working-directory: src/llama_stack_ui run: npm run format:check - name: Run unit tests - working-directory: llama_stack/ui + working-directory: src/llama_stack_ui env: CI: true diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index dd2097a45..dde129870 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -4,12 +4,16 @@ run-name: Run the unit test suite on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - - 'llama_stack/**' - - '!llama_stack/ui/**' + - 'src/llama_stack/**' + - '!src/llama_stack_ui/**' - 'tests/unit/**' - 'uv.lock' - 'pyproject.toml' @@ -32,7 +36,7 @@ jobs: - "3.13" steps: - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -45,7 +49,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: test-results-${{ matrix.python }} path: | diff --git a/.gitignore b/.gitignore index ca210db9a..0d8fd5a2f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,9 @@ CLAUDE.md .claude/ docs/.docusaurus/ docs/node_modules/ +docs/static/imported-files/ +docs/docs/api-deprecated/ +docs/docs/api-experimental/ +docs/docs/api/ +tests/integration/client-typescript/node_modules/ +.ts-client-checkout/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7880a9fc..cc3d531ec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ exclude: 'build/' - +minimum_pre_commit_version: 4.4.0 default_language_version: python: python3.12 node: "22" @@ -19,6 +19,7 @@ repos: - id: no-commit-to-branch - id: check-yaml args: ["--unsafe"] + exclude: 'docs/static/openai-spec-2.3.0.yml' - id: detect-private-key - id: mixed-line-ending args: [--fix=lf] # Forces to replace line ending by LF (line feed) @@ -42,7 +43,6 @@ repos: hooks: - id: ruff args: [ --fix ] - exclude: ^llama_stack/strong_typing/.*$ - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs @@ -52,13 +52,9 @@ repos: additional_dependencies: - black==24.3.0 -- repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.7.20 - hooks: - - id: uv-lock - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.16.1 + rev: v1.18.2 hooks: - id: mypy additional_dependencies: @@ -78,33 +74,48 @@ repos: - repo: local hooks: + - id: uv-lock + name: uv-lock + additional_dependencies: + - uv==0.7.20 + entry: ./scripts/uv-run-with-index.sh lock + language: python + pass_filenames: false + require_serial: true + files: ^(pyproject\.toml|uv\.lock)$ + - id: mypy-full + name: mypy (full type_checking) + entry: ./scripts/uv-run-with-index.sh run --group dev --group type_checking mypy + language: system + pass_filenames: false + stages: [manual] - id: distro-codegen name: Distribution Template Codegen additional_dependencies: - uv==0.7.8 - entry: uv run --group codegen ./scripts/distro_codegen.py + entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/distro_codegen.py language: python pass_filenames: false require_serial: true - files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ + files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$ - id: provider-codegen name: Provider Codegen additional_dependencies: - uv==0.7.8 - entry: uv run --group codegen ./scripts/provider_codegen.py + entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/provider_codegen.py language: python pass_filenames: false require_serial: true - files: ^llama_stack/providers/.*$ + files: ^src/llama_stack/providers/.*$|^scripts/run_openapi_generator.sh$ - id: openapi-codegen name: API Spec Codegen additional_dependencies: - uv==0.7.8 - entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null' + entry: sh -c './scripts/uv-run-with-index.sh run scripts/run_openapi_generator.sh' language: python pass_filenames: false require_serial: true - files: ^llama_stack/apis/|^docs/openapi_generator/ + files: ^src/llama_stack_api/.*$ - id: check-workflows-use-hashes name: Check GitHub Actions use SHA-pinned actions entry: ./scripts/check-workflows-use-hashes.sh @@ -120,7 +131,7 @@ repos: pass_filenames: false require_serial: true always_run: true - files: ^llama_stack/.*$ + files: ^src/llama_stack/.*$ - id: forbid-pytest-asyncio name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture entry: bash @@ -141,7 +152,7 @@ repos: name: Generate CI documentation additional_dependencies: - uv==0.7.8 - entry: uv run ./scripts/gen-ci-docs.py + entry: ./scripts/uv-run-with-index.sh run ./scripts/gen-ci-docs.py language: python pass_filenames: false require_serial: true @@ -150,7 +161,7 @@ repos: name: Format & Lint UI entry: bash ./scripts/run-ui-linter.sh language: system - files: ^llama_stack/ui/.*\.(ts|tsx)$ + files: ^src/llama_stack_ui/.*\.(ts|tsx)$ pass_filenames: false require_serial: true @@ -172,6 +183,44 @@ repos: exit 1 fi exit 0 + - id: fips-compliance + name: Ensure llama-stack remains FIPS compliant + entry: bash + language: system + types: [python] + pass_filenames: true + exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5 + args: + - -c + - | + grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && { + echo; + echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5" + echo " These functions are not FIPS-compliant" + echo; + exit 1; + } || true + - id: check-api-independence + name: Ensure llama_stack_api does not import llama_stack + entry: bash + language: system + pass_filenames: false + require_serial: true + always_run: true + files: ^src/llama_stack_api/.*$ + args: + - -c + - | + API_DIR="src/llama_stack_api" + grep -rn --include="*.py" -E '^[^#]*(import llama_stack\b|from llama_stack\b)' "$API_DIR" 2>/dev/null && { + echo "llama_stack_api must not import llama_stack"; + exit 1; + } + [ -f "$API_DIR/pyproject.toml" ] && grep -n 'llama_stack[^_]' "$API_DIR/pyproject.toml" && { + echo "llama_stack_api must not depend on llama_stack in pyproject.toml"; + exit 1; + } + exit 0 ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index c51a1b2aa..000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,614 +0,0 @@ -# Changelog - -# v0.2.20 -Published on: 2025-08-29T22:25:32Z - -Here are some key changes that are coming as part of this release. - -### Build and Environment - -- Environment improvements: fixed env var replacement to preserve types. -- Docker stability: fixed container startup failures for Fireworks AI provider. -- Removed absolute paths in build for better portability. - -### Features - -- UI Enhancements: Implemented file upload and VectorDB creation/configuration directly in UI. -- Vector Store Improvements: Added keyword, vector, and hybrid search inside vector store. -- Added S3 authorization support for file providers. -- SQL Store: Added inequality support to where clause. - -### Documentation - -- Fixed post-training docs. -- Added Contributor Guidelines for creating Internal vs. External providers. - -### Fixes - -- Removed unsupported bfcl scoring function. -- Multiple reliability and configuration fixes for providers and environment handling. - -### Engineering / Chores - -- Cleaner internal development setup with consistent paths. -- Incremental improvements to provider integration and vector store behavior. - - -### New Contributors -- @omertuc made their first contribution in #3270 -- @r3v5 made their first contribution in vector store hybrid search - ---- - -# v0.2.19 -Published on: 2025-08-26T22:06:55Z - -## Highlights -* feat: Add CORS configuration support for server by @skamenan7 in https://github.com/llamastack/llama-stack/pull/3201 -* feat(api): introduce /rerank by @ehhuang in https://github.com/llamastack/llama-stack/pull/2940 -* feat: Add S3 Files Provider by @mattf in https://github.com/llamastack/llama-stack/pull/3202 - - ---- - -# v0.2.18 -Published on: 2025-08-20T01:09:27Z - -## Highlights -* Add moderations create API -* Hybrid search in Milvus -* Numerous Responses API improvements -* Documentation updates - - ---- - -# v0.2.17 -Published on: 2025-08-05T01:51:14Z - -## Highlights - -* feat(tests): introduce inference record/replay to increase test reliability by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2941 -* fix(library_client): improve initialization error handling and prevent AttributeError by @mattf in https://github.com/meta-llama/llama-stack/pull/2944 -* fix: use OLLAMA_URL to activate Ollama provider in starter by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2963 -* feat(UI): adding MVP playground UI by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2828 -* Standardization of errors (@nathan-weinberg) -* feat: Enable DPO training with HuggingFace inline provider by @Nehanth in https://github.com/meta-llama/llama-stack/pull/2825 -* chore: rename templates to distributions by @ashwinb in https://github.com/meta-llama/llama-stack/pull/3035 - - ---- - -# v0.2.16 -Published on: 2025-07-28T23:35:23Z - -## Highlights - -* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc. -* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details. -* All tests migrated to pytest now (thanks @Elbehery) -* DPO implementation in the post-training provider (thanks @Nehanth) -* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back. -* `inline::vllm` provider is gone thank you very much -* several improvements to OpenAI inference implementations and LiteLLM backend (thanks @mattf) -* Chroma now supports Vector Store API (thanks @franciscojavierarceo). -* Authorization improvements: Vector Store/File APIs now supports access control (thanks @franciscojavierarceo); Telemetry read APIs are gated according to logged-in user's roles. - - - ---- - -# v0.2.15 -Published on: 2025-07-16T03:30:01Z - - - ---- - -# v0.2.14 -Published on: 2025-07-04T16:06:48Z - -## Highlights - -* Support for Llama Guard 4 -* Added Milvus support to vector-stores API -* Documentation and zero-to-hero updates for latest APIs - - ---- - -# v0.2.13 -Published on: 2025-06-28T04:28:11Z - -## Highlights -* search_mode support in OpenAI vector store API -* Security fixes - - ---- - -# v0.2.12 -Published on: 2025-06-20T22:52:12Z - -## Highlights -* Filter support in file search -* Support auth attributes in inference and response stores - - ---- - -# v0.2.11 -Published on: 2025-06-17T20:26:26Z - -## Highlights -* OpenAI-compatible vector store APIs -* Hybrid Search in Sqlite-vec -* File search tool in Responses API -* Pagination in inference and response stores -* Added `suffix` to completions API for fill-in-the-middle tasks - - ---- - -# v0.2.10.1 -Published on: 2025-06-06T20:11:02Z - -## Highlights -* ChromaDB provider fix - - ---- - -# v0.2.10 -Published on: 2025-06-05T23:21:45Z - -## Highlights - -* OpenAI-compatible embeddings API -* OpenAI-compatible Files API -* Postgres support in starter distro -* Enable ingestion of precomputed embeddings -* Full multi-turn support in Responses API -* Fine-grained access control policy - - ---- - -# v0.2.9 -Published on: 2025-05-30T20:01:56Z - -## Highlights -* Added initial streaming support in Responses API -* UI view for Responses -* Postgres inference store support - - ---- - -# v0.2.8 -Published on: 2025-05-27T21:03:47Z - -# Release v0.2.8 - -## Highlights - -* Server-side MCP with auth firewalls now works in the Stack - both for Agents and Responses -* Get chat completions APIs and UI to show chat completions -* Enable keyword search for sqlite-vec - - ---- - -# v0.2.7 -Published on: 2025-05-16T20:38:10Z - -## Highlights - -This is a small update. But a couple highlights: - -* feat: function tools in OpenAI Responses by @bbrowning in https://github.com/meta-llama/llama-stack/pull/2094, getting closer to ready. Streaming is the next missing piece. -* feat: Adding support for customizing chunk context in RAG insertion and querying by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2134 -* feat: scaffolding for Llama Stack UI by @ehhuang in https://github.com/meta-llama/llama-stack/pull/2149, more to come in the coming releases. - - ---- - -# v0.2.6 -Published on: 2025-05-12T18:06:52Z - - - ---- - -# v0.2.5 -Published on: 2025-05-04T20:16:49Z - - - ---- - -# v0.2.4 -Published on: 2025-04-29T17:26:01Z - -## Highlights - -* One-liner to install and run Llama Stack yay! by @reluctantfuturist in https://github.com/meta-llama/llama-stack/pull/1383 -* support for NVIDIA NeMo datastore by @raspawar in https://github.com/meta-llama/llama-stack/pull/1852 -* (yuge!) Kubernetes authentication by @leseb in https://github.com/meta-llama/llama-stack/pull/1778 -* (yuge!) OpenAI Responses API by @bbrowning in https://github.com/meta-llama/llama-stack/pull/1989 -* add api.llama provider, llama-guard-4 model by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2058 - - ---- - -# v0.2.3 -Published on: 2025-04-25T22:46:21Z - -## Highlights - -* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works. -* significant improvements and functionality added to the nVIDIA distribution -* many improvements to the test verification suite. -* new inference providers: Ramalama, IBM WatsonX -* many improvements to the Playground UI - - ---- - -# v0.2.2 -Published on: 2025-04-13T01:19:49Z - -## Main changes - -- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server -- OpenAI compatible inference API in progress (@bbrowning) -- Provider verifications (@ehhuang) -- Many updates and fixes to playground -- Several llama4 related fixes - - ---- - -# v0.2.1 -Published on: 2025-04-05T23:13:00Z - - - ---- - -# v0.2.0 -Published on: 2025-04-05T19:04:29Z - -## Llama 4 Support - -Checkout more at https://www.llama.com - - - ---- - -# v0.1.9 -Published on: 2025-03-29T00:52:23Z - -### Build and Test Agents -* Agents: Entire document context with attachments -* RAG: Documentation with sqlite-vec faiss comparison -* Getting started: Fixes to getting started notebook. - -### Agent Evals and Model Customization -* (**New**) Post-training: Add nemo customizer - -### Better Engineering -* Moved sqlite-vec to non-blocking calls -* Don't return a payload on file delete - - - ---- - -# v0.1.8 -Published on: 2025-03-24T01:28:50Z - -# v0.1.8 Release Notes - -### Build and Test Agents -* Safety: Integrated NVIDIA as a safety provider. -* VectorDB: Added Qdrant as an inline provider. -* Agents: Added support for multiple tool groups in agents. -* Agents: Simplified imports for Agents in client package - - -### Agent Evals and Model Customization -* Introduced DocVQA and IfEval benchmarks. - -### Deploying and Monitoring Agents -* Introduced a Containerfile and image workflow for the Playground. -* Implemented support for Bearer (API Key) authentication. -* Added attribute-based access control for resources. -* Fixes on docker deployments: use --pull always and standardized the default port to 8321 -* Deprecated: /v1/inspect/providers use /v1/providers/ instead - -### Better Engineering -* Consolidated scripts under the ./scripts directory. -* Addressed mypy violations in various modules. -* Added Dependabot scans for Python dependencies. -* Implemented a scheduled workflow to update the changelog automatically. -* Enforced concurrency to reduce CI loads. - - -### New Contributors -* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650 -* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671 -* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698 -* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745 - -**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8 - ---- - -# v0.1.7 -Published on: 2025-03-14T22:30:51Z - -## 0.1.7 Release Notes - -### Build and Test Agents -* Inference: ImageType is now refactored to LlamaStackImageType -* Inference: Added tests to measure TTFT -* Inference: Bring back usage metrics -* Agents: Added endpoint for get agent, list agents and list sessions -* Agents: Automated conversion of type hints in client tool for lite llm format -* Agents: Deprecated ToolResponseMessage in agent.resume API -* Added Provider API for listing and inspecting provider info - -### Agent Evals and Model Customization -* Eval: Added new eval benchmarks Math 500 and BFCL v3 -* Deploy and Monitoring of Agents -* Telemetry: Fix tracing to work across coroutines - -### Better Engineering -* Display code coverage for unit tests -* Updated call sites (inference, tool calls, agents) to move to async non blocking calls -* Unit tests also run on Python 3.11, 3.12, and 3.13 -* Added ollama inference to Integration tests CI -* Improved documentation across examples, testing, CLI, updated providers table ) - - - - ---- - -# v0.1.6 -Published on: 2025-03-08T04:35:08Z - -## 0.1.6 Release Notes - -### Build and Test Agents -* Inference: Fixed support for inline vllm provider -* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb) -* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples -* Agent: Unify tools and Python SDK Agents API -* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls -* Agent: Support python functions without @client_tool decorator as client tools -* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format -* VectorIO: MilvusDB support added - -### Agent Evals and Model Customization -* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb) -* Eval: Documentation for eval, scoring, adding new benchmarks -* Eval: Distribution template to run benchmarks on llama & non-llama models -* Eval: Ability to register new custom LLM-as-judge scoring functions -* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details. - -### Deploy and Monitoring of Agents -* Better support for different log levels across all components for better monitoring - -### Better Engineering -* Enhance OpenAPI spec to include Error types across all APIs -* Moved all tests to /tests and created unit tests to run on each PR -* Removed all dependencies on llama-models repo - - ---- - -# v0.1.5.1 -Published on: 2025-02-28T22:37:44Z - -## 0.1.5.1 Release Notes -* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328 - -**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1 - ---- - -# v0.1.5 -Published on: 2025-02-28T18:14:01Z - -## 0.1.5 Release Notes -### Build Agents -* Inference: Support more non-llama models (openai, anthropic, gemini) -* Inference: Can use the provider's model name in addition to the HF alias -* Inference: Fixed issues with calling tools that weren't specified in the prompt -* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling -* Embeddings: Added support for Nemo retriever embedding models -* Tools: Added support for MCP tools in Ollama Distribution -* Distributions: Added new Groq distribution - -### Customize Models -* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model - -### Monitor agents -* More comprehensive logging of agent steps including client tools -* Telemetry inputs/outputs are now structured and queryable -* Ability to retrieve agents session, turn, step by ids - -### Better Engineering -* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin -* Move most logging to use logger instead of prints -* Completed text /chat-completion and /completion tests - - ---- - -# v0.1.4 -Published on: 2025-02-25T00:02:43Z - -## v0.1.4 Release Notes -Here are the key changes coming as part of this release: - -### Build and Test Agents -* Inference: Added support for non-llama models -* Inference: Added option to list all downloaded models and remove models -* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn -* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides -* Agent: Added logging for agent step start and completion times -* Agent: Added support for logging for tool execution metadata -* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs -* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults -* VectorIO: Improved performance of sqlite-vec using chunked writes -### Agent Evals and Model Customization -* Deprecated api /eval-tasks. Use /eval/benchmark instead -* Added CPU training support for TorchTune -### Deploy and Monitoring of Agents -* Consistent view of client and server tool calls in telemetry -### Better Engineering -* Made tests more data-driven for consistent evaluation -* Fixed documentation links and improved API reference generation -* Various small fixes for build scripts and system reliability - - - ---- - -# v0.1.3 -Published on: 2025-02-14T20:24:32Z - -## v0.1.3 Release - -Here are some key changes that are coming as part of this release. - -### Build and Test Agents -Streamlined the initial development experience -- Added support for llama stack run --image-type venv -- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration -- vLLM improvements for tool calling and logprobs -- Better handling of sporadic code_interpreter tool calls - -### Agent Evals -Better benchmarking and Agent performance assessment -- Renamed eval API /eval-task to /benchmarks -- Improved documentation and notebooks for RAG and evals - -### Deploy and Monitoring of Agents -Improved production readiness -- Added usage metrics collection for chat completions -- CLI improvements for provider information -- Improved error handling and system reliability -- Better model endpoint handling and accessibility -- Improved signal handling on distro server - -### Better Engineering -Infrastructure and code quality improvements -- Faster text-based chat completion tests -- Improved testing for non-streaming agent apis -- Standardized import formatting with ruff linter -- Added conventional commits standard -- Fixed documentation parsing issues - - ---- - -# v0.1.2 -Published on: 2025-02-07T22:06:49Z - -# TL;DR -- Several stabilizations to development flows after the switch to `uv` -- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops) -- Added automated rebuilds for ReadTheDocs -- Llama Stack server supports HTTPS -- Added system prompt overrides support -- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan ) - - ---- - -# v0.1.1 -Published on: 2025-02-02T02:29:24Z - -A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements. - - ---- - -# v0.1.0 -Published on: 2025-01-24T17:47:47Z - -We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions. - -## Context -GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open. - -Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety. - -With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience. - -## Release -After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements. - -There are example standalone apps in llama-stack-apps. - - -## Key Features of this release - -- **Unified API Layer** - - Inference: Run LLM models - - RAG: Store and retrieve knowledge for RAG - - Agents: Build multi-step agentic workflows - - Tools: Register tools that can be called by the agent - - Safety: Apply content filtering and safety policies - - Evaluation: Test model and agent quality - - Telemetry: Collect and analyze usage data and complex agentic traces - - Post Training ( Coming Soon ): Fine tune models for specific use cases - -- **Rich Provider Ecosystem** - - Local Development: Meta's Reference, Ollama - - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras - - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI - - On-device: iOS and Android support - -- **Built for Production** - - Pre-packaged distributions for common deployment scenarios - - Backwards compatibility across model versions - - Comprehensive evaluation capabilities - - Full observability and monitoring - -- **Multiple developer interfaces** - - CLI: Command line interface - - Python SDK - - Swift iOS SDK - - Kotlin Android SDK - -- **Sample llama stack applications** - - Python - - iOS - - Android - - - ---- - -# v0.1.0rc12 -Published on: 2025-01-22T22:24:01Z - - - ---- - -# v0.0.63 -Published on: 2024-12-18T07:17:43Z - -A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially. - -**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63 - ---- - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c869b4f5c..ba6c2eaf2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify. +To run the expanded mypy configuration that CI enforces, use: + +```bash +uv run pre-commit run mypy-full --hook-stage manual --all-files +``` + +or invoke mypy directly with all optional dependencies: + +```bash +uv run --group dev --group type_checking mypy +``` + ```{caution} Before pushing your changes, make sure that the pre-commit hooks have passed successfully. ``` @@ -219,7 +231,7 @@ npm run serve If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: ```bash -uv run ./docs/openapi_generator/run_openapi_generator.sh +uv run ./scripts/run_openapi_generator.sh ``` The generated API schema will be available in `docs/static/`. Make sure to review the changes before committing. diff --git a/MANIFEST.in b/MANIFEST.in index b10795c92..09206f2fb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,11 +1,11 @@ include pyproject.toml -include llama_stack/models/llama/llama3/tokenizer.model -include llama_stack/models/llama/llama4/tokenizer.model -include llama_stack/core/*.sh -include llama_stack/cli/scripts/*.sh -include llama_stack/distributions/*/*.yaml -exclude llama_stack/distributions/ci-tests +include src/llama_stack/models/llama/llama3/tokenizer.model +include src/llama_stack/models/llama/llama4/tokenizer.model +include src/llama_stack/core/*.sh +include src/llama_stack/cli/scripts/*.sh +include src/llama_stack/distributions/*/*.yaml +exclude src/llama_stack/distributions/ci-tests include tests/integration/test_cases/inference/*.json -include llama_stack/models/llama/*/*.md -include llama_stack/tests/integration/*.jpg -prune llama_stack/distributions/ci-tests +include src/llama_stack/models/llama/*/*.md +include src/llama_stack/tests/integration/*.jpg +prune src/llama_stack/distributions/ci-tests diff --git a/README.md b/README.md index bb8587855..5360f4ff0 100644 --- a/README.md +++ b/README.md @@ -10,83 +10,6 @@ [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack) -### ✨🎉 Llama 4 Support 🎉✨ -We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta. - -
- -👋 Click here to see how to run Llama 4 models on Llama Stack - -\ -*Note you need 8xH100 GPU-host to run these models* - -```bash -pip install -U llama_stack - -MODEL="Llama-4-Scout-17B-16E-Instruct" -# get meta url from llama.com -huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL - -# install dependencies for the distribution -llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install - -# start a llama stack server -INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu - -# install client to interact with the server -pip install llama-stack-client -``` -### CLI -```bash -# Run a chat completion -MODEL="Llama-4-Scout-17B-16E-Instruct" - -llama-stack-client --endpoint http://localhost:8321 \ -inference chat-completion \ ---model-id meta-llama/$MODEL \ ---message "write a haiku for meta's llama 4 models" - -OpenAIChatCompletion( - ... - choices=[ - OpenAIChatCompletionChoice( - finish_reason='stop', - index=0, - message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam( - role='assistant', - content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*', - ... - ), - ... - ) - ], - ... -) -``` -### Python SDK -```python -from llama_stack_client import LlamaStackClient - -client = LlamaStackClient(base_url=f"http://localhost:8321") - -model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct" -prompt = "Write a haiku about coding" - -print(f"User> {prompt}") -response = client.chat.completions.create( - model=model_id, - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt}, - ], -) -print(f"Assistant> {response.choices[0].message.content}") -``` -As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned! - - -
- ### 🚀 One-Line Installer 🚀 To try Llama Stack locally, run: @@ -99,7 +22,7 @@ curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides -- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. +- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals. - **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile. - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment. - **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android. @@ -125,34 +48,34 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. Please checkout for [full list](https://llamastack.github.io/docs/providers) -| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO | -|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:| -| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| SambaNova | Hosted | | ✅ | | ✅ | | | | | -| Cerebras | Hosted | | ✅ | | | | | | | -| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | | -| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | | -| Together | Hosted | ✅ | ✅ | | ✅ | | | | | -| Groq | Hosted | | ✅ | | | | | | | -| Ollama | Single Node | | ✅ | | | | | | | -| TGI | Hosted/Single Node | | ✅ | | | | | | | -| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | | -| ChromaDB | Hosted/Single Node | | | ✅ | | | | | | -| Milvus | Hosted/Single Node | | | ✅ | | | | | | -| Qdrant | Hosted/Single Node | | | ✅ | | | | | | -| Weaviate | Hosted/Single Node | | | ✅ | | | | | | -| SQLite-vec | Single Node | | | ✅ | | | | | | -| PG Vector | Single Node | | | ✅ | | | | | | -| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | | -| vLLM | Single Node | | ✅ | | | | | | | -| OpenAI | Hosted | | ✅ | | | | | | | -| Anthropic | Hosted | | ✅ | | | | | | | -| Gemini | Hosted | | ✅ | | | | | | | -| WatsonX | Hosted | | ✅ | | | | | | | -| HuggingFace | Single Node | | | | | | ✅ | | ✅ | -| TorchTune | Single Node | | | | | | ✅ | | | -| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ | -| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ | +| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Post Training | Eval | DatasetIO | +|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:-------------:|:----:|:--------:| +| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| SambaNova | Hosted | | ✅ | | ✅ | | | | +| Cerebras | Hosted | | ✅ | | | | | | +| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | +| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | +| Together | Hosted | ✅ | ✅ | | ✅ | | | | +| Groq | Hosted | | ✅ | | | | | | +| Ollama | Single Node | | ✅ | | | | | | +| TGI | Hosted/Single Node | | ✅ | | | | | | +| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | +| ChromaDB | Hosted/Single Node | | | ✅ | | | | | +| Milvus | Hosted/Single Node | | | ✅ | | | | | +| Qdrant | Hosted/Single Node | | | ✅ | | | | | +| Weaviate | Hosted/Single Node | | | ✅ | | | | | +| SQLite-vec | Single Node | | | ✅ | | | | | +| PG Vector | Single Node | | | ✅ | | | | | +| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | +| vLLM | Single Node | | ✅ | | | | | | +| OpenAI | Hosted | | ✅ | | | | | | +| Anthropic | Hosted | | ✅ | | | | | | +| Gemini | Hosted | | ✅ | | | | | | +| WatsonX | Hosted | | ✅ | | | | | | +| HuggingFace | Single Node | | | | | ✅ | | ✅ | +| TorchTune | Single Node | | | | | ✅ | | | +| NVIDIA NEMO | Hosted | | ✅ | ✅ | | ✅ | ✅ | ✅ | +| NVIDIA | Hosted | | | | | ✅ | ✅ | ✅ | > **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation. diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index e1ca170f5..aed3b97c2 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -9,7 +9,6 @@ data: - inference - files - safety - - telemetry - tool_runtime - vector_io providers: @@ -44,14 +43,6 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -75,12 +66,6 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -115,13 +100,21 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: embedding_dimension: 768 diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index 2ccaa21aa..a0d636e09 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -6,7 +6,6 @@ apis: - inference - files - safety -- telemetry - tool_runtime - vector_io providers: @@ -27,28 +26,16 @@ providers: config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default vector_io: - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + persistence: + namespace: vector_io::chroma_remote + backend: kv_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -58,26 +45,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -112,32 +88,46 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8323 +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md index 5d391f14c..54ff3d3d1 100644 --- a/client-sdks/stainless/README.md +++ b/client-sdks/stainless/README.md @@ -1,8 +1,11 @@ These are the source-of-truth configuration files used to generate the Stainless client SDKs via Stainless. - `openapi.yml`: this is the OpenAPI specification for the Llama Stack API. -- `openapi.stainless.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs. +- `config.yml`: this is the Stainless _configuration_ which instructs Stainless how to generate the client SDKs. A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files. -These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `run_openapi_generator.sh` script. \ No newline at end of file +These files go hand-in-hand. Both `openapi.yml` and `config.yml` are generated by `scripts/run_openapi_generator.sh`: + +- `openapi.yml` comes from the FastAPI-based generator. +- `config.yml` is rendered from `scripts/openapi_generator/stainless_config/config_data.py` so the Stainless config stays in lock-step with the spec. diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml new file mode 100644 index 000000000..212b2b54a --- /dev/null +++ b/client-sdks/stainless/config.yml @@ -0,0 +1,490 @@ +# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json + +organization: + name: llama-stack-client + docs: https://llama-stack.readthedocs.io/en/latest/ + contact: llamastack@meta.com +security: +- {} +- BearerAuth: [] +security_schemes: + BearerAuth: + type: http + scheme: bearer +targets: + node: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-typescript + publish: + npm: false + python: + package_name: llama_stack_client + production_repo: llamastack/llama-stack-client-python + options: + use_uv: true + publish: + pypi: true + project_name: llama_stack_client + kotlin: + reverse_domain: com.llama_stack_client.api + production_repo: null + publish: + maven: false + go: + package_name: llama-stack-client + production_repo: llamastack/llama-stack-client-go + options: + enable_v2: true + back_compat_use_shared_package: false +client_settings: + default_env_prefix: LLAMA_STACK_CLIENT + opts: + api_key: + type: string + read_env: LLAMA_STACK_CLIENT_API_KEY + auth: + security_scheme: BearerAuth + nullable: true +environments: + production: http://any-hosted-llama-stack.com +pagination: +- name: datasets_iterrows + type: offset + request: + dataset_id: + type: string + start_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_param + limit: + type: integer + response: + data: + type: array + items: + type: object + next_index: + type: integer + x-stainless-pagination-property: + purpose: offset_count_start_field +- name: openai_cursor_page + type: cursor + request: + limit: + type: integer + after: + type: string + x-stainless-pagination-property: + purpose: next_cursor_param + response: + data: + type: array + items: {} + has_more: + type: boolean + last_id: + type: string + x-stainless-pagination-property: + purpose: next_cursor_field +settings: + license: MIT + unwrap_response_fields: + - data + file_header: 'Copyright (c) Meta Platforms, Inc. and affiliates. + + All rights reserved. + + + This source code is licensed under the terms described in the LICENSE file in + + the root directory of this source tree. + + ' +openapi: + transformations: + - command: mergeObject + reason: Better return_type using enum + args: + target: + - $.components.schemas + object: + ReturnType: + additionalProperties: false + properties: + type: + enum: + - string + - number + - boolean + - array + - object + - json + - union + - chat_completion_input + - completion_input + - agent_turn_input + required: + - type + type: object + - command: replaceProperties + reason: Replace return type properties with better model (see above) + args: + filter: + only: + - $.components.schemas.ScoringFn.properties.return_type + - $.components.schemas.RegisterScoringFunctionRequest.properties.return_type + value: + $ref: '#/components/schemas/ReturnType' + - command: oneOfToAnyOf + reason: Prism (mock server) doesn't like one of our requests as it technically + matches multiple variants +readme: + example_requests: + default: + type: request + endpoint: post /v1/chat/completions + params: {} + headline: + type: request + endpoint: get /v1/models + params: {} + pagination: + type: request + endpoint: post /v1/chat/completions + params: {} +resources: + $shared: + models: + interleaved_content_item: InterleavedContentItem + interleaved_content: InterleavedContent + param_type: ParamType + safety_violation: SafetyViolation + sampling_params: SamplingParams + scoring_result: ScoringResult + system_message: SystemMessage + toolgroups: + models: + tool_group: ToolGroup + list_tool_groups_response: ListToolGroupsResponse + methods: + register: post /v1/toolgroups + get: get /v1/toolgroups/{toolgroup_id} + list: get /v1/toolgroups + unregister: delete /v1/toolgroups/{toolgroup_id} + tools: + methods: + get: get /v1/tools/{tool_name} + list: + paginated: false + endpoint: get /v1/tools + tool_runtime: + models: + tool_def: ToolDef + tool_invocation_result: ToolInvocationResult + methods: + list_tools: + paginated: false + endpoint: get /v1/tool-runtime/list-tools + invoke_tool: post /v1/tool-runtime/invoke + responses: + models: + response_object_stream: OpenAIResponseObjectStream + response_object: OpenAIResponseObject + methods: + create: + type: http + streaming: + stream_event_model: responses.response_object_stream + param_discriminator: stream + endpoint: post /v1/responses + retrieve: get /v1/responses/{response_id} + list: + type: http + endpoint: get /v1/responses + delete: + type: http + endpoint: delete /v1/responses/{response_id} + subresources: + input_items: + methods: + list: + type: http + paginated: false + endpoint: get /v1/responses/{response_id}/input_items + prompts: + models: + prompt: Prompt + list_prompts_response: ListPromptsResponse + methods: + create: post /v1/prompts + list: + paginated: false + endpoint: get /v1/prompts + retrieve: get /v1/prompts/{prompt_id} + update: post /v1/prompts/{prompt_id} + delete: delete /v1/prompts/{prompt_id} + set_default_version: post /v1/prompts/{prompt_id}/set-default-version + subresources: + versions: + methods: + list: + paginated: false + endpoint: get /v1/prompts/{prompt_id}/versions + conversations: + models: + conversation_object: Conversation + methods: + create: + type: http + endpoint: post /v1/conversations + retrieve: get /v1/conversations/{conversation_id} + update: + type: http + endpoint: post /v1/conversations/{conversation_id} + delete: + type: http + endpoint: delete /v1/conversations/{conversation_id} + subresources: + items: + methods: + get: + type: http + endpoint: get /v1/conversations/{conversation_id}/items/{item_id} + list: + type: http + endpoint: get /v1/conversations/{conversation_id}/items + create: + type: http + endpoint: post /v1/conversations/{conversation_id}/items + delete: + type: http + endpoint: delete /v1/conversations/{conversation_id}/items/{item_id} + inspect: + models: + healthInfo: HealthInfo + providerInfo: ProviderInfo + routeInfo: RouteInfo + versionInfo: VersionInfo + methods: + health: get /v1/health + version: get /v1/version + embeddings: + models: + create_embeddings_response: OpenAIEmbeddingsResponse + methods: + create: post /v1/embeddings + chat: + models: + chat_completion_chunk: OpenAIChatCompletionChunk + subresources: + completions: + methods: + create: + type: http + streaming: + stream_event_model: chat.chat_completion_chunk + param_discriminator: stream + endpoint: post /v1/chat/completions + list: + type: http + paginated: false + endpoint: get /v1/chat/completions + retrieve: + type: http + endpoint: get /v1/chat/completions/{completion_id} + completions: + methods: + create: + type: http + streaming: + param_discriminator: stream + endpoint: post /v1/completions + vector_io: + models: + queryChunksResponse: QueryChunksResponse + methods: + insert: post /v1/vector-io/insert + query: post /v1/vector-io/query + vector_stores: + models: + vector_store: VectorStoreObject + list_vector_stores_response: VectorStoreListResponse + vector_store_delete_response: VectorStoreDeleteResponse + vector_store_search_response: VectorStoreSearchResponsePage + methods: + create: post /v1/vector_stores + list: get /v1/vector_stores + retrieve: get /v1/vector_stores/{vector_store_id} + update: post /v1/vector_stores/{vector_store_id} + delete: delete /v1/vector_stores/{vector_store_id} + search: post /v1/vector_stores/{vector_store_id}/search + subresources: + files: + models: + vector_store_file: VectorStoreFileObject + methods: + list: get /v1/vector_stores/{vector_store_id}/files + retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id} + update: post /v1/vector_stores/{vector_store_id}/files/{file_id} + delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id} + create: post /v1/vector_stores/{vector_store_id}/files + content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content + file_batches: + models: + vector_store_file_batches: VectorStoreFileBatchObject + list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse + methods: + create: post /v1/vector_stores/{vector_store_id}/file_batches + retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id} + list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files + cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel + models: + models: + model: OpenAIModel + list_models_response: OpenAIListModelsResponse + methods: + list: + paginated: false + endpoint: get /v1/models + retrieve: get /v1/models/{model_id} + register: post /v1/models + unregister: delete /v1/models/{model_id} + subresources: + openai: + methods: + list: + paginated: false + endpoint: get /v1/models + providers: + models: + list_providers_response: ListProvidersResponse + methods: + list: + paginated: false + endpoint: get /v1/providers + retrieve: get /v1/providers/{provider_id} + routes: + models: + list_routes_response: ListRoutesResponse + methods: + list: + paginated: false + endpoint: get /v1/inspect/routes + moderations: + models: + create_response: ModerationObject + methods: + create: post /v1/moderations + safety: + models: + run_shield_response: RunShieldResponse + methods: + run_shield: post /v1/safety/run-shield + shields: + models: + shield: Shield + list_shields_response: ListShieldsResponse + methods: + retrieve: get /v1/shields/{identifier} + list: + paginated: false + endpoint: get /v1/shields + register: post /v1/shields + delete: delete /v1/shields/{identifier} + scoring: + methods: + score: post /v1/scoring/score + score_batch: post /v1/scoring/score-batch + scoring_functions: + models: + scoring_fn: ScoringFn + scoring_fn_params: ScoringFnParams + list_scoring_functions_response: ListScoringFunctionsResponse + methods: + retrieve: get /v1/scoring-functions/{scoring_fn_id} + list: + paginated: false + endpoint: get /v1/scoring-functions + register: post /v1/scoring-functions + unregister: delete /v1/scoring-functions/{scoring_fn_id} + files: + models: + file: OpenAIFileObject + list_files_response: ListOpenAIFileResponse + delete_file_response: OpenAIFileDeleteResponse + methods: + create: post /v1/files + list: get /v1/files + retrieve: get /v1/files/{file_id} + delete: delete /v1/files/{file_id} + content: get /v1/files/{file_id}/content + batches: + methods: + create: post /v1/batches + list: get /v1/batches + retrieve: get /v1/batches/{batch_id} + cancel: post /v1/batches/{batch_id}/cancel + alpha: + subresources: + inference: + methods: + rerank: post /v1alpha/inference/rerank + post_training: + models: + algorithm_config: AlgorithmConfig + post_training_job: PostTrainingJob + list_post_training_jobs_response: ListPostTrainingJobsResponse + methods: + preference_optimize: post /v1alpha/post-training/preference-optimize + supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune + subresources: + job: + methods: + artifacts: get /v1alpha/post-training/job/artifacts + cancel: post /v1alpha/post-training/job/cancel + status: get /v1alpha/post-training/job/status + list: + paginated: false + endpoint: get /v1alpha/post-training/jobs + benchmarks: + models: + benchmark: Benchmark + list_benchmarks_response: ListBenchmarksResponse + methods: + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id} + list: + paginated: false + endpoint: get /v1alpha/eval/benchmarks + register: post /v1alpha/eval/benchmarks + unregister: delete /v1alpha/eval/benchmarks/{benchmark_id} + eval: + models: + evaluate_response: EvaluateResponse + benchmark_config: BenchmarkConfig + job: Job + methods: + evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations + run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs + subresources: + jobs: + methods: + cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} + retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result + beta: + subresources: + datasets: + models: + list_datasets_response: ListDatasetsResponse + methods: + register: post /v1beta/datasets + retrieve: get /v1beta/datasets/{dataset_id} + list: + paginated: false + endpoint: get /v1beta/datasets + unregister: delete /v1beta/datasets/{dataset_id} + iterrows: get /v1beta/datasetio/iterrows/{dataset_id} + appendrows: post /v1beta/datasetio/append-rows/{dataset_id} diff --git a/client-sdks/stainless/openapi.stainless.yml b/client-sdks/stainless/openapi.stainless.yml deleted file mode 100644 index 9461be996..000000000 --- a/client-sdks/stainless/openapi.stainless.yml +++ /dev/null @@ -1,610 +0,0 @@ -# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json - -organization: - # Name of your organization or company, used to determine the name of the client - # and headings. - name: llama-stack-client - docs: https://llama-stack.readthedocs.io/en/latest/ - contact: llamastack@meta.com -security: - - {} - - BearerAuth: [] -security_schemes: - BearerAuth: - type: http - scheme: bearer -# `targets` define the output targets and their customization options, such as -# whether to emit the Node SDK and what it's package name should be. -targets: - node: - package_name: llama-stack-client - production_repo: llamastack/llama-stack-client-typescript - publish: - npm: false - python: - package_name: llama_stack_client - production_repo: llamastack/llama-stack-client-python - options: - use_uv: true - publish: - pypi: true - project_name: llama_stack_client - kotlin: - reverse_domain: com.llama_stack_client.api - production_repo: null - publish: - maven: false - go: - package_name: llama-stack-client - production_repo: llamastack/llama-stack-client-go - options: - enable_v2: true - back_compat_use_shared_package: false - -# `client_settings` define settings for the API client, such as extra constructor -# arguments (used for authentication), retry behavior, idempotency, etc. -client_settings: - default_env_prefix: LLAMA_STACK_CLIENT - opts: - api_key: - type: string - read_env: LLAMA_STACK_CLIENT_API_KEY - auth: { security_scheme: BearerAuth } - nullable: true - -# `environments` are a map of the name of the environment (e.g. "sandbox", -# "production") to the corresponding url to use. -environments: - production: http://any-hosted-llama-stack.com - -# `pagination` defines [pagination schemes] which provides a template to match -# endpoints and generate next-page and auto-pagination helpers in the SDKs. -pagination: - - name: datasets_iterrows - type: offset - request: - dataset_id: - type: string - start_index: - type: integer - x-stainless-pagination-property: - purpose: offset_count_param - limit: - type: integer - response: - data: - type: array - items: - type: object - next_index: - type: integer - x-stainless-pagination-property: - purpose: offset_count_start_field - - name: openai_cursor_page - type: cursor - request: - limit: - type: integer - after: - type: string - x-stainless-pagination-property: - purpose: next_cursor_param - response: - data: - type: array - items: {} - has_more: - type: boolean - last_id: - type: string - x-stainless-pagination-property: - purpose: next_cursor_field -# `resources` define the structure and organziation for your API, such as how -# methods and models are grouped together and accessed. See the [configuration -# guide] for more information. -# -# [configuration guide]: -# https://app.stainlessapi.com/docs/guides/configure#resources -resources: - $shared: - models: - agent_config: AgentConfig - interleaved_content_item: InterleavedContentItem - interleaved_content: InterleavedContent - param_type: ParamType - safety_violation: SafetyViolation - sampling_params: SamplingParams - scoring_result: ScoringResult - message: Message - user_message: UserMessage - completion_message: CompletionMessage - tool_response_message: ToolResponseMessage - system_message: SystemMessage - tool_call: ToolCall - query_result: RAGQueryResult - document: RAGDocument - query_config: RAGQueryConfig - response_format: ResponseFormat - toolgroups: - models: - tool_group: ToolGroup - list_tool_groups_response: ListToolGroupsResponse - methods: - register: post /v1/toolgroups - get: get /v1/toolgroups/{toolgroup_id} - list: get /v1/toolgroups - unregister: delete /v1/toolgroups/{toolgroup_id} - tools: - methods: - get: get /v1/tools/{tool_name} - list: - endpoint: get /v1/tools - paginated: false - - tool_runtime: - models: - tool_def: ToolDef - tool_invocation_result: ToolInvocationResult - methods: - list_tools: - endpoint: get /v1/tool-runtime/list-tools - paginated: false - invoke_tool: post /v1/tool-runtime/invoke - subresources: - rag_tool: - methods: - insert: post /v1/tool-runtime/rag-tool/insert - query: post /v1/tool-runtime/rag-tool/query - - responses: - models: - response_object_stream: OpenAIResponseObjectStream - response_object: OpenAIResponseObject - methods: - create: - type: http - endpoint: post /v1/responses - streaming: - stream_event_model: responses.response_object_stream - param_discriminator: stream - retrieve: get /v1/responses/{response_id} - list: - type: http - endpoint: get /v1/responses - delete: - type: http - endpoint: delete /v1/responses/{response_id} - subresources: - input_items: - methods: - list: - type: http - endpoint: get /v1/responses/{response_id}/input_items - - conversations: - models: - conversation_object: Conversation - methods: - create: - type: http - endpoint: post /v1/conversations - retrieve: get /v1/conversations/{conversation_id} - update: - type: http - endpoint: post /v1/conversations/{conversation_id} - delete: - type: http - endpoint: delete /v1/conversations/{conversation_id} - subresources: - items: - methods: - get: - type: http - endpoint: get /v1/conversations/{conversation_id}/items/{item_id} - list: - type: http - endpoint: get /v1/conversations/{conversation_id}/items - create: - type: http - endpoint: post /v1/conversations/{conversation_id}/items - - inspect: - models: - healthInfo: HealthInfo - providerInfo: ProviderInfo - routeInfo: RouteInfo - versionInfo: VersionInfo - methods: - health: get /v1/health - version: get /v1/version - - embeddings: - models: - create_embeddings_response: OpenAIEmbeddingsResponse - methods: - create: post /v1/embeddings - - chat: - models: - chat_completion_chunk: OpenAIChatCompletionChunk - subresources: - completions: - methods: - create: - type: http - endpoint: post /v1/chat/completions - streaming: - stream_event_model: chat.chat_completion_chunk - param_discriminator: stream - list: - type: http - endpoint: get /v1/chat/completions - retrieve: - type: http - endpoint: get /v1/chat/completions/{completion_id} - completions: - methods: - create: - type: http - endpoint: post /v1/completions - streaming: - param_discriminator: stream - - vector_io: - models: - queryChunksResponse: QueryChunksResponse - methods: - insert: post /v1/vector-io/insert - query: post /v1/vector-io/query - - vector_stores: - models: - vector_store: VectorStoreObject - list_vector_stores_response: VectorStoreListResponse - vector_store_delete_response: VectorStoreDeleteResponse - vector_store_search_response: VectorStoreSearchResponsePage - methods: - create: post /v1/vector_stores - list: - endpoint: get /v1/vector_stores - retrieve: get /v1/vector_stores/{vector_store_id} - update: post /v1/vector_stores/{vector_store_id} - delete: delete /v1/vector_stores/{vector_store_id} - search: post /v1/vector_stores/{vector_store_id}/search - subresources: - files: - models: - vector_store_file: VectorStoreFileObject - methods: - list: get /v1/vector_stores/{vector_store_id}/files - retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id} - update: post /v1/vector_stores/{vector_store_id}/files/{file_id} - delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id} - create: post /v1/vector_stores/{vector_store_id}/files - content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content - file_batches: - models: - vector_store_file_batches: VectorStoreFileBatchObject - list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse - methods: - create: post /v1/vector_stores/{vector_store_id}/file_batches - retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id} - list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files - cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel - - models: - models: - model: Model - list_models_response: ListModelsResponse - methods: - retrieve: get /v1/models/{model_id} - list: - endpoint: get /v1/models - paginated: false - register: post /v1/models - unregister: delete /v1/models/{model_id} - subresources: - openai: - methods: - list: - endpoint: get /v1/models - paginated: false - - providers: - models: - list_providers_response: ListProvidersResponse - methods: - list: - endpoint: get /v1/providers - paginated: false - retrieve: get /v1/providers/{provider_id} - - routes: - models: - list_routes_response: ListRoutesResponse - methods: - list: - endpoint: get /v1/inspect/routes - paginated: false - - - moderations: - models: - create_response: ModerationObject - methods: - create: post /v1/moderations - - - safety: - models: - run_shield_response: RunShieldResponse - methods: - run_shield: post /v1/safety/run-shield - - - shields: - models: - shield: Shield - list_shields_response: ListShieldsResponse - methods: - retrieve: get /v1/shields/{identifier} - list: - endpoint: get /v1/shields - paginated: false - register: post /v1/shields - delete: delete /v1/shields/{identifier} - - synthetic_data_generation: - models: - syntheticDataGenerationResponse: SyntheticDataGenerationResponse - methods: - generate: post /v1/synthetic-data-generation/generate - - telemetry: - models: - span_with_status: SpanWithStatus - trace: Trace - query_spans_response: QuerySpansResponse - event: Event - query_condition: QueryCondition - methods: - query_traces: - endpoint: post /v1alpha/telemetry/traces - skip_test_reason: 'unsupported query params in java / kotlin' - get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree - query_spans: - endpoint: post /v1alpha/telemetry/spans - skip_test_reason: 'unsupported query params in java / kotlin' - query_metrics: - endpoint: post /v1alpha/telemetry/metrics/{metric_name} - skip_test_reason: 'unsupported query params in java / kotlin' - # log_event: post /v1alpha/telemetry/events - save_spans_to_dataset: post /v1alpha/telemetry/spans/export - get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id} - get_trace: get /v1alpha/telemetry/traces/{trace_id} - - scoring: - methods: - score: post /v1/scoring/score - score_batch: post /v1/scoring/score-batch - scoring_functions: - methods: - retrieve: get /v1/scoring-functions/{scoring_fn_id} - list: - endpoint: get /v1/scoring-functions - paginated: false - register: post /v1/scoring-functions - models: - scoring_fn: ScoringFn - scoring_fn_params: ScoringFnParams - list_scoring_functions_response: ListScoringFunctionsResponse - - benchmarks: - methods: - retrieve: get /v1alpha/eval/benchmarks/{benchmark_id} - list: - endpoint: get /v1alpha/eval/benchmarks - paginated: false - register: post /v1alpha/eval/benchmarks - models: - benchmark: Benchmark - list_benchmarks_response: ListBenchmarksResponse - - files: - methods: - create: post /v1/files - list: get /v1/files - retrieve: get /v1/files/{file_id} - delete: delete /v1/files/{file_id} - content: get /v1/files/{file_id}/content - models: - file: OpenAIFileObject - list_files_response: ListOpenAIFileResponse - delete_file_response: OpenAIFileDeleteResponse - - alpha: - subresources: - inference: - methods: - rerank: post /v1alpha/inference/rerank - - post_training: - models: - algorithm_config: AlgorithmConfig - post_training_job: PostTrainingJob - list_post_training_jobs_response: ListPostTrainingJobsResponse - methods: - preference_optimize: post /v1alpha/post-training/preference-optimize - supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune - subresources: - job: - methods: - artifacts: get /v1alpha/post-training/job/artifacts - cancel: post /v1alpha/post-training/job/cancel - status: get /v1alpha/post-training/job/status - list: - endpoint: get /v1alpha/post-training/jobs - paginated: false - - eval: - methods: - evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations - run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs - evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations - run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs - - subresources: - jobs: - methods: - cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} - status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id} - retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result - models: - evaluate_response: EvaluateResponse - benchmark_config: BenchmarkConfig - job: Job - - agents: - methods: - create: post /v1alpha/agents - list: get /v1alpha/agents - retrieve: get /v1alpha/agents/{agent_id} - delete: delete /v1alpha/agents/{agent_id} - models: - inference_step: InferenceStep - tool_execution_step: ToolExecutionStep - tool_response: ToolResponse - shield_call_step: ShieldCallStep - memory_retrieval_step: MemoryRetrievalStep - subresources: - session: - models: - session: Session - methods: - list: get /v1alpha/agents/{agent_id}/sessions - create: post /v1alpha/agents/{agent_id}/session - delete: delete /v1alpha/agents/{agent_id}/session/{session_id} - retrieve: get /v1alpha/agents/{agent_id}/session/{session_id} - steps: - methods: - retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id} - turn: - models: - turn: Turn - turn_response_event: AgentTurnResponseEvent - agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk - methods: - create: - type: http - endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn - streaming: - stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk - param_discriminator: stream - retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id} - resume: - type: http - endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume - streaming: - stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk - param_discriminator: stream - - beta: - subresources: - datasets: - models: - list_datasets_response: ListDatasetsResponse - methods: - register: post /v1beta/datasets - retrieve: get /v1beta/datasets/{dataset_id} - list: - endpoint: get /v1beta/datasets - paginated: false - unregister: delete /v1beta/datasets/{dataset_id} - iterrows: get /v1beta/datasetio/iterrows/{dataset_id} - appendrows: post /v1beta/datasetio/append-rows/{dataset_id} - - -settings: - license: MIT - unwrap_response_fields: [ data ] - -openapi: - transformations: - - command: renameValue - reason: pydantic reserved name - args: - filter: - only: - - '$.components.schemas.InferenceStep.properties.model_response' - rename: - python: - property_name: 'inference_model_response' - - # - command: renameValue - # reason: pydantic reserved name - # args: - # filter: - # only: - # - '$.components.schemas.Model.properties.model_type' - # rename: - # python: - # property_name: 'type' - - command: mergeObject - reason: Better return_type using enum - args: - target: - - '$.components.schemas' - object: - ReturnType: - additionalProperties: false - properties: - type: - enum: - - string - - number - - boolean - - array - - object - - json - - union - - chat_completion_input - - completion_input - - agent_turn_input - required: - - type - type: object - - command: replaceProperties - reason: Replace return type properties with better model (see above) - args: - filter: - only: - - '$.components.schemas.ScoringFn.properties.return_type' - - '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type' - value: - $ref: '#/components/schemas/ReturnType' - - command: oneOfToAnyOf - reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants - - reason: For better names - command: extractToRefs - args: - ref: - target: '$.components.schemas.ToolCallDelta.properties.tool_call' - name: '#/components/schemas/ToolCallOrString' - -# `readme` is used to configure the code snippets that will be rendered in the -# README.md of various SDKs. In particular, you can change the `headline` -# snippet's endpoint and the arguments to call it with. -readme: - example_requests: - default: - type: request - endpoint: post /v1/chat/completions - params: &ref_0 {} - headline: - type: request - endpoint: post /v1/models - params: *ref_0 - pagination: - type: request - endpoint: post /v1/chat/completions - params: {} diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 93049a14a..51607d92d 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -1,20 +1,158 @@ openapi: 3.1.0 info: - title: >- - Llama Stack Specification - Stable & Experimental APIs - version: v1 - description: >- + title: Llama Stack Specification - Stable & Experimental APIs + description: |- This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are - tailored to - best leverage Llama Models. + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. - **🔗 COMBINED**: This specification includes both stable production-ready APIs - and experimental pre-release APIs. Use stable APIs for production deployments - and experimental APIs for testing new features. + **🔗 COMBINED**: This specification includes both stable production-ready APIs + and experimental pre-release APIs. Use stable APIs for production deployments + and experimental APIs for testing new features. + version: v1 servers: - - url: http://any-hosted-llama-stack.com +- url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: List Batches + description: List all batches for the current user. + operationId: list_batches_v1_batches_get + parameters: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + type: integer + default: 20 + title: Limit + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: Create Batch + description: Create a new batch for processing multiple API requests. + operationId: create_batch_v1_batches_post + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Retrieve Batch + description: Retrieve information about a specific batch. + operationId: retrieve_batch_v1_batches__batch_id__get + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel Batch + description: Cancel a batch that is in progress. + operationId: cancel_batch_v1_batches__batch_id__cancel_post + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/chat/completions: get: responses: @@ -26,48 +164,56 @@ paths: $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: List chat completions. + - Inference + summary: List Chat Completions description: List chat completions. + operationId: list_chat_completions_v1_chat_completions_get parameters: - - name: after - in: query - description: >- - The ID of the last chat completion to return. - required: false - schema: - type: string - - name: limit - in: query - description: >- - The maximum number of chat completions to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort the chat completions by: "asc" or "desc". Defaults to - "desc". - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -75,35 +221,36 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletion' - - $ref: '#/components/schemas/OpenAIChatCompletionChunk' + $ref: '#/components/schemas/OpenAIChatCompletion' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIChatCompletionChunk' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: Create chat completions. - description: >- + - Inference + summary: Openai Chat Completion + description: |- Create chat completions. - Generate an OpenAI-compatible chat completion for the given messages using - the specified model. - parameters: [] + Generate an OpenAI-compatible chat completion for the given messages using the specified model. + operationId: openai_chat_completion_v1_chat_completions_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' - required: true - deprecated: false /v1/chat/completions/{completion_id}: get: responses: @@ -114,30 +261,32 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletionWithInputMessages' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Get chat completion. - description: >- + - Inference + summary: Get Chat Completion + description: |- Get chat completion. Describe a chat completion by its ID. + operationId: get_chat_completion_v1_chat_completions__completion_id__get parameters: - - name: completion_id - in: path - description: ID of the chat completion. - required: true - schema: - type: string - deprecated: false + - name: completion_id + in: path + required: true + schema: + type: string + description: 'Path parameter: completion_id' /v1/completions: post: responses: @@ -148,31 +297,31 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletion' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create completion. - description: >- + - Inference + summary: Openai Completion + description: |- Create completion. - Generate an OpenAI-compatible completion for the given prompt using the specified - model. - parameters: [] + Generate an OpenAI-compatible completion for the given prompt using the specified model. + operationId: openai_completion_v1_completions_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' required: true - deprecated: false /v1/conversations: post: responses: @@ -183,30 +332,31 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Create a conversation. - description: >- + - Conversations + summary: Create Conversation + description: |- Create a conversation. Create a conversation. - parameters: [] + operationId: create_conversation_v1_conversations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreateConversationRequest' required: true - deprecated: false /v1/conversations/{conversation_id}: get: responses: @@ -217,30 +367,32 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve a conversation. - description: >- + - Conversations + summary: Get Conversation + description: |- Retrieve a conversation. Get a conversation with the given ID. + operationId: get_conversation_v1_conversations__conversation_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' post: responses: '200': @@ -250,36 +402,38 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Update a conversation. - description: >- + - Conversations + summary: Update Conversation + description: |- Update a conversation. Update a conversation's metadata with the given ID. + operationId: update_conversation_v1_conversations__conversation_id__post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/UpdateConversationRequest' required: true - deprecated: false delete: responses: '200': @@ -289,30 +443,32 @@ paths: schema: $ref: '#/components/schemas/ConversationDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete a conversation. - description: >- + - Conversations + summary: Openai Delete Conversation + description: |- Delete a conversation. Delete a conversation with the given ID. + operationId: openai_delete_conversation_v1_conversations__conversation_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' /v1/conversations/{conversation_id}/items: get: responses: @@ -324,173 +480,68 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: List items. - description: >- + - Conversations + summary: List Items + description: |- List items. List items in the conversation. + operationId: list_items_v1_conversations__conversation_id__items_get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - enum: + - asc + - desc type: string - - name: after - in: query - description: >- - An item ID to list items after, used in pagination. - required: true - schema: - oneOf: - - type: string - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: include - in: query - description: >- - Specify additional output data to include in the response. - required: true - schema: - oneOf: - - type: array - items: - type: string - enum: - - code_interpreter_call.outputs - - computer_call_output.output.image_url - - file_search_call.results - - message.input_image.image_url - - message.output_text.logprobs - - reasoning.encrypted_content - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: limit - in: query - description: >- - A limit on the number of objects to be returned (1-100, default 20). - required: true - schema: - oneOf: - - type: integer - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: order - in: query - description: >- - The order to return items in (asc or desc, default desc). - required: true - schema: - oneOf: - - type: string - enum: - - asc - - desc - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - deprecated: false + - type: 'null' + title: Order + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array + items: + $ref: '#/components/schemas/ConversationItemInclude' + - type: 'null' + title: Include post: responses: '200': @@ -501,35 +552,37 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: Create items. - description: >- + - Conversations + summary: Add Items + description: |- Create items. Create items in the conversation. + operationId: add_items_v1_conversations__conversation_id__items_post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/AddItemsRequest' - required: true - deprecated: false /v1/conversations/{conversation_id}/items/{item_id}: get: responses: @@ -538,38 +591,40 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ConversationItem' + $ref: '#/components/schemas/OpenAIResponseMessage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve an item. - description: >- + - Conversations + summary: Retrieve + description: |- Retrieve an item. Retrieve a conversation item. + operationId: retrieve_v1_conversations__conversation_id__items__item_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' delete: responses: '200': @@ -579,375 +634,378 @@ paths: schema: $ref: '#/components/schemas/ConversationItemDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete an item. - description: >- + - Conversations + summary: Openai Delete Conversation Item + description: |- Delete an item. Delete a conversation item. + operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' /v1/embeddings: post: responses: '200': - description: >- - An OpenAIEmbeddingsResponse containing the embeddings. + description: An OpenAIEmbeddingsResponse containing the embeddings. content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create embeddings. - description: >- + - Inference + summary: Openai Embeddings + description: |- Create embeddings. - Generate OpenAI-compatible embeddings for the given input using the specified - model. - parameters: [] + Generate OpenAI-compatible embeddings for the given input using the specified model. + operationId: openai_embeddings_v1_embeddings_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true - deprecated: false /v1/files: get: responses: '200': - description: >- - An ListOpenAIFileResponse containing the list of files. + description: An ListOpenAIFileResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/ListOpenAIFileResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: List files. - description: >- + - Files + summary: Openai List Files + description: |- List files. Returns a list of files that belong to the user's organization. + operationId: openai_list_files_v1_files_get parameters: - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. For instance, if you make a list request and receive - 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo - in order to fetch the next page of the list. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 10,000, and the default is 10,000. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - $ref: '#/components/schemas/Order' - - name: purpose - in: query - description: >- - Only return files with the given purpose. - required: false - schema: - $ref: '#/components/schemas/OpenAIFilePurpose' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 10000 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: purpose + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/OpenAIFilePurpose' + - type: 'null' + title: Purpose post: responses: '200': - description: >- - An OpenAIFileObject representing the uploaded file. + description: An OpenAIFileObject representing the uploaded file. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: Upload file. - description: >- + - Files + summary: Openai Upload File + description: |- Upload file. Upload a file that can be used across various endpoints. - The file upload should be a multipart form request with: - - file: The File object (not file name) to be uploaded. - - purpose: The intended purpose of the uploaded file. - - expires_after: Optional form values describing expiration for the file. - parameters: [] + operationId: openai_upload_file_v1_files_post requestBody: + required: true content: multipart/form-data: schema: - type: object - properties: - file: - type: string - format: binary - purpose: - $ref: '#/components/schemas/OpenAIFilePurpose' - expires_after: - $ref: '#/components/schemas/ExpiresAfter' - required: - - file - - purpose - required: true - deprecated: false + $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post' /v1/files/{file_id}: get: responses: '200': - description: >- - An OpenAIFileObject containing file information. + description: An OpenAIFileObject containing file information. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file. - description: >- + - Files + summary: Openai Retrieve File + description: |- Retrieve file. Returns information about a specific file. + operationId: openai_retrieve_file_v1_files__file_id__get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' delete: responses: '200': - description: >- - An OpenAIFileDeleteResponse indicating successful deletion. + description: An OpenAIFileDeleteResponse indicating successful deletion. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Delete file. + - Files + summary: Openai Delete File description: Delete file. + operationId: openai_delete_file_v1_files__file_id__delete parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/files/{file_id}/content: get: responses: '200': - description: >- - The raw file content as a binary response. + description: The raw file content as a binary response. content: application/json: schema: $ref: '#/components/schemas/Response' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file content. - description: >- + - Files + summary: Openai Retrieve File Content + description: |- Retrieve file content. Returns the contents of the specified file. + operationId: openai_retrieve_file_content_v1_files__file_id__content_get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/health: get: responses: '200': - description: >- - Health information indicating if the service is operational. + description: Health information indicating if the service is operational. content: application/json: schema: $ref: '#/components/schemas/HealthInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get health status. - description: >- + - Inspect + summary: Health + description: |- Get health status. Get the current health status of the service. - parameters: [] - deprecated: false + operationId: health_v1_health_get /v1/inspect/routes: get: responses: '200': - description: >- - Response containing information about all available routes. + description: Response containing information about all available routes. content: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inspect - summary: List routes. - description: >- + - Inspect + summary: List Routes + description: |- List routes. List all available API routes with their methods and implementing providers. - parameters: [] - deprecated: false + operationId: list_routes_v1_inspect_routes_get + parameters: + - name: api_filter + in: query + required: false + schema: + anyOf: + - enum: + - v1 + - v1alpha + - v1beta + - deprecated + type: string + - type: 'null' + title: Api Filter /v1/models: get: responses: '200': - description: A ListModelsResponse. + description: A OpenAIListModelsResponse. content: application/json: schema: - $ref: '#/components/schemas/ListModelsResponse' + $ref: '#/components/schemas/OpenAIListModelsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: List all models. - description: List all models. - parameters: [] - deprecated: false + - Models + summary: Openai List Models + description: List models using the OpenAI API. + operationId: openai_list_models_v1_models_get post: responses: '200': @@ -957,30 +1015,32 @@ paths: schema: $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: Register model. - description: >- + - Models + summary: Register Model + description: |- Register model. Register a model. - parameters: [] + operationId: register_model_v1_models_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterModelRequest' required: true - deprecated: false + deprecated: true /v1/models/{model_id}: get: responses: @@ -991,60 +1051,64 @@ paths: schema: $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: Get model. - description: >- + - Models + summary: Get Model + description: |- Get model. Get a model by its identifier. + operationId: get_model_v1_models__model_id__get parameters: - - name: model_id - in: path - description: The identifier of the model to get. - required: true - schema: - type: string - deprecated: false + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Models - summary: Unregister model. - description: >- + - Models + summary: Unregister Model + description: |- Unregister model. Unregister a model. + operationId: unregister_model_v1_models__model_id__delete parameters: - - name: model_id - in: path - description: >- - The identifier of the model to unregister. - required: true - schema: - type: string - deprecated: false + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' + deprecated: true /v1/moderations: post: responses: @@ -1055,56 +1119,57 @@ paths: schema: $ref: '#/components/schemas/ModerationObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Create moderation. - description: >- + - Safety + summary: Run Moderation + description: |- Create moderation. Classifies if text and/or image inputs are potentially harmful. - parameters: [] + operationId: run_moderation_v1_moderations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunModerationRequest' required: true - deprecated: false /v1/prompts: get: responses: '200': - description: >- - A ListPromptsResponse containing all prompts. + description: A ListPromptsResponse containing all prompts. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List all prompts. + - Prompts + summary: List Prompts description: List all prompts. - parameters: [] - deprecated: false + operationId: list_prompts_v1_prompts_get post: responses: '200': @@ -1114,30 +1179,31 @@ paths: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Create prompt. - description: >- + - Prompts + summary: Create Prompt + description: |- Create prompt. Create a new prompt. - parameters: [] + operationId: create_prompt_v1_prompts_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreatePromptRequest' required: true - deprecated: false /v1/prompts/{prompt_id}: get: responses: @@ -1149,246 +1215,254 @@ paths: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Get prompt. - description: >- + - Prompts + summary: Get Prompt + description: |- Get prompt. Get a prompt by its identifier and optional version. + operationId: get_prompt_v1_prompts__prompt_id__get parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to get. - required: true - schema: - type: string - - name: version - in: query - description: >- - The version of the prompt to get (defaults to latest). - required: false - schema: - type: integer - deprecated: false + - name: version + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Version + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' post: responses: '200': - description: >- - The updated Prompt resource with incremented version. + description: The updated Prompt resource with incremented version. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Update prompt. - description: >- + - Prompts + summary: Update Prompt + description: |- Update prompt. Update an existing prompt (increments version). + operationId: update_prompt_v1_prompts__prompt_id__post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to update. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/UpdatePromptRequest' - required: true - deprecated: false delete: responses: - '200': - description: OK '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response + '204': + description: Successful Response tags: - - Prompts - summary: Delete prompt. - description: >- + - Prompts + summary: Delete Prompt + description: |- Delete prompt. Delete a prompt. + operationId: delete_prompt_v1_prompts__prompt_id__delete parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to delete. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/prompts/{prompt_id}/set-default-version: post: responses: '200': - description: >- - The prompt with the specified version now set as default. + description: The prompt with the specified version now set as default. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Set prompt version. - description: >- + - Prompts + summary: Set Default Version + description: |- Set prompt version. Set which version of a prompt should be the default in get_prompt (latest). + operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/SetDefaultVersionRequest' required: true - deprecated: false /v1/prompts/{prompt_id}/versions: get: responses: '200': - description: >- - A ListPromptsResponse containing all versions of the prompt. + description: A ListPromptsResponse containing all versions of the prompt. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List prompt versions. - description: >- + - Prompts + summary: List Prompt Versions + description: |- List prompt versions. List all versions of a specific prompt. + operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get parameters: - - name: prompt_id - in: path - description: >- - The identifier of the prompt to list versions for. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/providers: get: responses: '200': - description: >- - A ListProvidersResponse containing information about all providers. + description: A ListProvidersResponse containing information about all providers. content: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: List providers. - description: >- + - Providers + summary: List Providers + description: |- List providers. List all available providers. - parameters: [] - deprecated: false + operationId: list_providers_v1_providers_get /v1/providers/{provider_id}: get: responses: '200': - description: >- - A ProviderInfo object containing the provider's details. + description: A ProviderInfo object containing the provider's details. content: application/json: schema: $ref: '#/components/schemas/ProviderInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: Get provider. - description: >- + - Providers + summary: Inspect Provider + description: |- Get provider. Get detailed information about a specific provider. + operationId: inspect_provider_v1_providers__provider_id__get parameters: - - name: provider_id - in: path - description: The ID of the provider to inspect. - required: true - schema: - type: string - deprecated: false + - name: provider_id + in: path + required: true + schema: + type: string + description: 'Path parameter: provider_id' /v1/responses: get: responses: @@ -1400,45 +1474,56 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List all responses. + - Agents + summary: List Openai Responses description: List all responses. + operationId: list_openai_responses_v1_responses_get parameters: - - name: after - in: query - description: The ID of the last response to return. - required: false - schema: - type: string - - name: limit - in: query - description: The number of responses to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter responses by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort responses by when sorted by created_at ('asc' or 'desc'). - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 50 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -1452,38 +1537,51 @@ paths: $ref: '#/components/schemas/OpenAIResponseObjectStream' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: Create a model response. + - Agents + summary: Create Openai Response description: Create a model response. - parameters: [] + operationId: create_openai_response_v1_responses_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/CreateOpenaiResponseRequest' - required: true - deprecated: false - x-llama-stack-extra-body-params: - - name: guardrails - schema: - type: array - items: - oneOf: + x-llama-stack-extra-body-params: + guardrails: + $defs: + ResponseGuardrailSpec: + description: |- + Specification for a guardrail to apply during response generation. + + :param type: The type/identifier of the guardrail. + properties: + type: + title: Type + type: string + required: + - type + title: ResponseGuardrailSpec + type: object + anyOf: + - items: + anyOf: - type: string - $ref: '#/components/schemas/ResponseGuardrailSpec' - description: >- - List of guardrails to apply during response generation. Guardrails provide - safety and content moderation. - required: false + type: array + - type: 'null' + description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation. /v1/responses/{response_id}: get: responses: @@ -1494,28 +1592,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Get a model response. + - Agents + summary: Get Openai Response description: Get a model response. + operationId: get_openai_response_v1_responses__response_id__get parameters: - - name: response_id - in: path - description: >- - The ID of the OpenAI response to retrieve. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' delete: responses: '200': @@ -1525,27 +1624,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIDeleteResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Delete a response. + - Agents + summary: Delete Openai Response description: Delete a response. + operationId: delete_openai_response_v1_responses__response_id__delete parameters: - - name: response_id - in: path - description: The ID of the OpenAI response to delete. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' /v1/responses/{response_id}/input_items: get: responses: @@ -1557,65 +1658,72 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseInputItem' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List input items. + - Agents + summary: List Openai Response Input Items description: List input items. + operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get parameters: - - name: response_id - in: path - description: >- - The ID of the response to retrieve input items for. - required: true - schema: - type: string - - name: after - in: query - description: >- - An item ID to list items after, used for pagination. - required: false - schema: - type: string - - name: before - in: query - description: >- - An item ID to list items before, used for pagination. - required: false - schema: - type: string - - name: include - in: query - description: >- - Additional fields to include in the response. - required: false - schema: - type: array + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array items: type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - The order to return the input items in. Default is desc. - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - type: 'null' + title: Include /v1/safety/run-shield: post: responses: @@ -1626,30 +1734,31 @@ paths: schema: $ref: '#/components/schemas/RunShieldResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Run shield. - description: >- + - Safety + summary: Run Shield + description: |- Run shield. Run a shield. - parameters: [] + operationId: run_shield_v1_safety_run_shield_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunShieldRequest' required: true - deprecated: false /v1/scoring-functions: get: responses: @@ -1660,47 +1769,50 @@ paths: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: List all scoring functions. + - Scoring Functions + summary: List Scoring Functions description: List all scoring functions. - parameters: [] - deprecated: false + operationId: list_scoring_functions_v1_scoring_functions_get post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ScoringFunctions - summary: Register a scoring function. + - Scoring Functions + summary: Register Scoring Function description: Register a scoring function. - parameters: [] + operationId: register_scoring_function_v1_scoring_functions_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterScoringFunctionRequest' required: true - deprecated: false + deprecated: true /v1/scoring-functions/{scoring_fn_id}: get: responses: @@ -1711,86 +1823,90 @@ paths: schema: $ref: '#/components/schemas/ScoringFn' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: Get a scoring function by its ID. + - Scoring Functions + summary: Get Scoring Function description: Get a scoring function by its ID. + operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get parameters: - - name: scoring_fn_id - in: path - description: The ID of the scoring function to get. - required: true - schema: - type: string - deprecated: false + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ScoringFunctions - summary: Unregister a scoring function. + - Scoring Functions + summary: Unregister Scoring Function description: Unregister a scoring function. + operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete parameters: - - name: scoring_fn_id - in: path - description: >- - The ID of the scoring function to unregister. - required: true - schema: - type: string - deprecated: false + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' + deprecated: true /v1/scoring/score: post: responses: '200': - description: >- - A ScoreResponse object containing rows and aggregated results. + description: A ScoreResponse object containing rows and aggregated results. content: application/json: schema: $ref: '#/components/schemas/ScoreResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a list of rows. + - Scoring + summary: Score description: Score a list of rows. - parameters: [] + operationId: score_v1_scoring_score_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreRequest' required: true - deprecated: false /v1/scoring/score-batch: post: responses: @@ -1801,27 +1917,28 @@ paths: schema: $ref: '#/components/schemas/ScoreBatchResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a batch of rows. + - Scoring + summary: Score Batch description: Score a batch of rows. - parameters: [] + operationId: score_batch_v1_scoring_score_batch_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreBatchRequest' required: true - deprecated: false /v1/shields: get: responses: @@ -1832,21 +1949,22 @@ paths: schema: $ref: '#/components/schemas/ListShieldsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: List all shields. + - Shields + summary: List Shields description: List all shields. - parameters: [] - deprecated: false + operationId: list_shields_v1_shields_get post: responses: '200': @@ -1856,27 +1974,29 @@ paths: schema: $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: Register a shield. + - Shields + summary: Register Shield description: Register a shield. - parameters: [] + operationId: register_shield_v1_shields_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterShieldRequest' required: true - deprecated: false + deprecated: true /v1/shields/{identifier}: get: responses: @@ -1887,88 +2007,58 @@ paths: schema: $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: Get a shield by its identifier. + - Shields + summary: Get Shield description: Get a shield by its identifier. + operationId: get_shield_v1_shields__identifier__get parameters: - - name: identifier - in: path - description: The identifier of the shield to get. - required: true - schema: - type: string - deprecated: false + - name: identifier + in: path + required: true + schema: + type: string + description: 'Path parameter: identifier' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Shields - summary: Unregister a shield. + - Shields + summary: Unregister Shield description: Unregister a shield. + operationId: unregister_shield_v1_shields__identifier__delete parameters: - - name: identifier - in: path - description: >- - The identifier of the shield to unregister. - required: true - schema: - type: string - deprecated: false - /v1/synthetic-data-generation/generate: - post: - responses: - '200': - description: >- - Response containing filtered synthetic data samples and optional statistics - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - SyntheticDataGeneration (Coming Soon) - summary: >- - Generate synthetic data based on input dialogs and apply filtering. - description: >- - Generate synthetic data based on input dialogs and apply filtering. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerateRequest' + - name: identifier + in: path required: true - deprecated: false + schema: + type: string + description: 'Path parameter: identifier' + deprecated: true /v1/tool-runtime/invoke: post: responses: @@ -1979,27 +2069,29 @@ paths: schema: $ref: '#/components/schemas/ToolInvocationResult' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolRuntime - summary: Run a tool with the given arguments. + - Tool Runtime + summary: Invoke Tool description: Run a tool with the given arguments. - parameters: [] + operationId: invoke_tool_v1_tool_runtime_invoke_post requestBody: content: application/json: schema: $ref: '#/components/schemas/InvokeToolRequest' required: true - deprecated: false + deprecated: true /v1/tool-runtime/list-tools: get: responses: @@ -2011,97 +2103,47 @@ paths: $ref: '#/components/schemas/ListToolDefsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - ToolRuntime - summary: List all tools in the runtime. + - Tool Runtime + summary: List Runtime Tools description: List all tools in the runtime. + operationId: list_runtime_tools_v1_tool_runtime_list_tools_get parameters: - - name: tool_group_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - - name: mcp_endpoint - in: query - description: >- - The MCP endpoint to use for the tool group. - required: false - schema: - $ref: '#/components/schemas/URL' - deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false + - name: authorization + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Authorization + - name: tool_group_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Tool Group Id + - name: mcp_endpoint + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/URL' + - type: 'null' + title: Mcp Endpoint + deprecated: true /v1/toolgroups: get: responses: @@ -2112,47 +2154,51 @@ paths: schema: $ref: '#/components/schemas/ListToolGroupsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: List tool groups with optional provider. + - Tool Groups + summary: List Tool Groups description: List tool groups with optional provider. - parameters: [] - deprecated: false + operationId: list_tool_groups_v1_toolgroups_get + deprecated: true post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ToolGroups - summary: Register a tool group. + - Tool Groups + summary: Register Tool Group description: Register a tool group. - parameters: [] + operationId: register_tool_group_v1_toolgroups_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterToolGroupRequest' required: true - deprecated: false + deprecated: true /v1/toolgroups/{toolgroup_id}: get: responses: @@ -2163,53 +2209,59 @@ paths: schema: $ref: '#/components/schemas/ToolGroup' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: Get a tool group by its ID. + - Tool Groups + summary: Get Tool Group description: Get a tool group by its ID. + operationId: get_tool_group_v1_toolgroups__toolgroup_id__get parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to get. - required: true - schema: - type: string - deprecated: false + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' + deprecated: true delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ToolGroups - summary: Unregister a tool group. + - Tool Groups + summary: Unregister Toolgroup description: Unregister a tool group. + operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to unregister. - required: true - schema: - type: string - deprecated: false + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' + deprecated: true /v1/tools: get: responses: @@ -2221,27 +2273,31 @@ paths: $ref: '#/components/schemas/ListToolDefsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - ToolGroups - summary: List tools with optional tool group. + - Tool Groups + summary: List Tools description: List tools with optional tool group. + operationId: list_tools_v1_tools_get parameters: - - name: toolgroup_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - deprecated: false + - name: toolgroup_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Toolgroup Id + deprecated: true /v1/tools/{tool_name}: get: responses: @@ -2252,54 +2308,58 @@ paths: schema: $ref: '#/components/schemas/ToolDef' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: Get a tool by its name. + - Tool Groups + summary: Get Tool description: Get a tool by its name. + operationId: get_tool_v1_tools__tool_name__get parameters: - - name: tool_name - in: path - description: The name of the tool to get. - required: true - schema: - type: string - deprecated: false + - name: tool_name + in: path + required: true + schema: + type: string + description: 'Path parameter: tool_name' + deprecated: true /v1/vector-io/insert: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - VectorIO - summary: Insert chunks into a vector database. + - Vector Io + summary: Insert Chunks description: Insert chunks into a vector database. - parameters: [] + operationId: insert_chunks_v1_vector_io_insert_post requestBody: content: application/json: schema: $ref: '#/components/schemas/InsertChunksRequest' required: true - deprecated: false /v1/vector-io/query: post: responses: @@ -2310,800 +2370,829 @@ paths: schema: $ref: '#/components/schemas/QueryChunksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Query chunks from a vector database. + - Vector Io + summary: Query Chunks description: Query chunks from a vector database. - parameters: [] + operationId: query_chunks_v1_vector_io_query_post requestBody: content: application/json: schema: $ref: '#/components/schemas/QueryChunksRequest' required: true - deprecated: false /v1/vector_stores: get: responses: '200': - description: >- - A VectorStoreListResponse containing the list of vector stores. + description: A VectorStoreListResponse containing the list of vector stores. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Returns a list of vector stores. + - Vector Io + summary: Openai List Vector Stores description: Returns a list of vector stores. + operationId: openai_list_vector_stores_v1_vector_stores_get parameters: - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order post: responses: '200': - description: >- - A VectorStoreObject representing the created vector store. + description: A VectorStoreObject representing the created vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Creates a vector store. - description: >- + - Vector Io + summary: Openai Create Vector Store + description: |- Creates a vector store. Generate an OpenAI-compatible vector store with the given parameters. - parameters: [] + operationId: openai_create_vector_store_v1_vector_stores_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}: get: responses: '200': - description: >- - A VectorStoreObject representing the vector store. + description: A VectorStoreObject representing the vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store. + - Vector Io + summary: Openai Retrieve Vector Store description: Retrieves a vector store. + operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreObject representing the updated vector store. + description: A VectorStoreObject representing the updated vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store. + - Vector Io + summary: Openai Update Vector Store description: Updates a vector store. + operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreDeleteResponse indicating the deletion status. + description: A VectorStoreDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store. + - Vector Io + summary: Openai Delete Vector Store description: Delete a vector store. + operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' /v1/vector_stores/{vector_store_id}/file_batches: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the created file batch. + description: A VectorStoreFileBatchObject representing the created file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Create a vector store file batch. - description: >- + - Vector Io + summary: Openai Create Vector Store File Batch + description: |- Create a vector store file batch. - Generate an OpenAI-compatible vector store file batch for the given vector - store. + Generate an OpenAI-compatible vector store file batch for the given vector store. + operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true - deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: get: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the file batch. + description: A VectorStoreFileBatchObject representing the file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieve a vector store file batch. + - Vector Io + summary: Openai Retrieve Vector Store File Batch description: Retrieve a vector store file batch. + operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get parameters: - - name: batch_id - in: path - description: The ID of the file batch to retrieve. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the cancelled file batch. + description: A VectorStoreFileBatchObject representing the cancelled file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Cancels a vector store file batch. + - Vector Io + summary: Openai Cancel Vector Store File Batch description: Cancels a vector store file batch. + operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post parameters: - - name: batch_id - in: path - description: The ID of the file batch to cancel. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: get: responses: '200': - description: >- - A VectorStoreFilesListInBatchResponse containing the list of files in - the batch. + description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Returns a list of vector store files in a batch. - description: >- - Returns a list of vector store files in a batch. + - Vector Io + summary: Openai List Files In Vector Store File Batch + description: Returns a list of vector store files in a batch. + operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get parameters: - - name: batch_id - in: path - description: >- - The ID of the file batch to list files from. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - Filter by file status. One of in_progress, completed, failed, cancelled. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Filter + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/files: get: responses: '200': - description: >- - A VectorStoreListFilesResponse containing the list of files. + description: A VectorStoreListFilesResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListFilesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: List files in a vector store. + - Vector Io + summary: Openai List Files In Vector Store description: List files in a vector store. + operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to list files from. - required: true - schema: - type: string - - name: limit - in: query - description: >- - (Optional) A limit on the number of objects to be returned. Limit can - range between 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - (Optional) Sort order by the `created_at` timestamp of the objects. `asc` - for ascending order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - (Optional) A cursor for use in pagination. `after` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - (Optional) A cursor for use in pagination. `before` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - (Optional) Filter by file status to only return files with the specified - status. - required: false - schema: - $ref: '#/components/schemas/VectorStoreFileStatus' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + title: Filter + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + nullable: true + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the attached file. + description: A VectorStoreFileObject representing the attached file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Attach a file to a vector store. + - Vector Io + summary: Openai Attach File To Vector Store description: Attach a file to a vector store. + operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to attach the file to. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}/files/{file_id}: get: responses: '200': - description: >- - A VectorStoreFileObject representing the file. + description: A VectorStoreFileObject representing the file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File description: Retrieves a vector store file. + operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the updated file. + description: A VectorStoreFileObject representing the updated file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store file. + - Vector Io + summary: Openai Update Vector Store File description: Updates a vector store file. + operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to update. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreFileDeleteResponse indicating the deletion status. + description: A VectorStoreFileDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store file. + - Vector Io + summary: Openai Delete Vector Store File description: Delete a vector store file. + operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to delete. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/files/{file_id}/content: get: responses: '200': - description: >- - A list of InterleavedContent representing the file contents. + description: File contents, optionally with embeddings and metadata based on query parameters. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Retrieves the contents of a vector store file. - description: >- - Retrieves the contents of a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File Contents + description: Retrieves the contents of a vector store file. + operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: include_embeddings + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Embeddings + - name: include_metadata + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Metadata + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/search: post: responses: '200': - description: >- - A VectorStoreSearchResponse containing the search results. + description: A VectorStoreSearchResponse containing the search results. content: application/json: schema: $ref: '#/components/schemas/VectorStoreSearchResponsePage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Search for chunks in a vector store. - description: >- + - Vector Io + summary: Openai Search Vector Store + description: |- Search for chunks in a vector store. - Searches a vector store for relevant chunks based on a query and optional - file attribute filters. + Searches a vector store for relevant chunks based on a query and optional file attribute filters. + operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to search. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' required: true - deprecated: false /v1/version: get: responses: '200': - description: >- - Version information containing the service version number. + description: Version information containing the service version number. content: application/json: schema: $ref: '#/components/schemas/VersionInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get version. - description: >- + - Inspect + summary: Version + description: |- Get version. Get the version of the service. - parameters: [] - deprecated: false + operationId: version_v1_version_get /v1beta/datasetio/append-rows/{dataset_id}: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - DatasetIO - summary: Append rows to a dataset. + - Datasetio + summary: Append Rows description: Append rows to a dataset. + operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to append the rows to. - required: true - schema: - type: string + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/AppendRowsRequest' required: true - deprecated: false /v1beta/datasetio/iterrows/{dataset_id}: get: responses: @@ -3115,55 +3204,53 @@ paths: $ref: '#/components/schemas/PaginatedResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - DatasetIO - summary: >- - Get a paginated list of rows from a dataset. - description: >- + - Datasetio + summary: Iterrows + description: |- Get a paginated list of rows from a dataset. Uses offset-based pagination where: - - start_index: The starting index (0-based). If None, starts from beginning. - - limit: Number of items to return. If None or -1, returns all items. - The response includes: - - data: List of items for the current page. - - has_more: Whether there are more items available after this set. + operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to get the rows from. - required: true - schema: - type: string - - name: start_index - in: query - description: >- - Index into dataset for the first row to get. Get all rows if None. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of rows to get. - required: false - schema: - type: integer - deprecated: false + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: start_index + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Start Index + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' /v1beta/datasets: get: responses: @@ -3174,21 +3261,22 @@ paths: schema: $ref: '#/components/schemas/ListDatasetsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: List all datasets. + - Datasets + summary: List Datasets description: List all datasets. - parameters: [] - deprecated: false + operationId: list_datasets_v1beta_datasets_get post: responses: '200': @@ -3198,27 +3286,29 @@ paths: schema: $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: Register a new dataset. + - Datasets + summary: Register Dataset description: Register a new dataset. - parameters: [] + operationId: register_dataset_v1beta_datasets_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterDatasetRequest' required: true - deprecated: false + deprecated: true /v1beta/datasets/{dataset_id}: get: responses: @@ -3229,550 +3319,58 @@ paths: schema: $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: Get a dataset by its ID. + - Datasets + summary: Get Dataset description: Get a dataset by its ID. + operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: - - name: dataset_id - in: path - description: The ID of the dataset to get. - required: true - schema: - type: string - deprecated: false + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Datasets - summary: Unregister a dataset by its ID. + - Datasets + summary: Unregister Dataset description: Unregister a dataset by its ID. + operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: - - name: dataset_id - in: path - description: The ID of the dataset to unregister. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all agents. - description: List all agents. - parameters: - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of agents to return. - required: false - schema: - type: integer - deprecated: false - post: - responses: - '200': - description: >- - An AgentCreateResponse with the agent ID. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Create an agent with the given configuration. - description: >- - Create an agent with the given configuration. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentRequest' + - name: dataset_id + in: path required: true - deprecated: false - /v1alpha/agents/{agent_id}: - get: - responses: - '200': - description: An Agent of the agent. - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Describe an agent by its ID. - description: Describe an agent by its ID. - parameters: - - name: agent_id - in: path - description: ID of the agent. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent by its ID and its associated sessions and turns. - description: >- - Delete an agent by its ID and its associated sessions and turns. - parameters: - - name: agent_id - in: path - description: The ID of the agent to delete. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session: - post: - responses: - '200': - description: An AgentSessionCreateResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentSessionCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new session for an agent. - description: Create a new session for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the session for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentSessionRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}: - get: - responses: - '200': - description: A Session. - content: - application/json: - schema: - $ref: '#/components/schemas/Session' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent session by its ID. - description: Retrieve an agent session by its ID. - parameters: - - name: session_id - in: path - description: The ID of the session to get. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to get the session for. - required: true - schema: - type: string - - name: turn_ids - in: query - description: >- - (Optional) List of turn IDs to filter the session by. - required: false - schema: - type: array - items: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent session by its ID and its associated turns. - description: >- - Delete an agent session by its ID and its associated turns. - parameters: - - name: session_id - in: path - description: The ID of the session to delete. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to delete the session for. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn: - post: - responses: - '200': - description: >- - If stream=False, returns a Turn object. If stream=True, returns an SSE - event stream of AgentTurnResponseStreamChunk. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new turn for an agent. - description: Create a new turn for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to create the turn for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}: - get: - responses: - '200': - description: A Turn. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent turn by its ID. - description: Retrieve an agent turn by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the turn for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: - post: - responses: - '200': - description: >- - A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk - objects. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Resume an agent turn with executed tool call responses. - description: >- - Resume an agent turn with executed tool call responses. - - When a Turn has the status `awaiting_input` due to pending input from client - side tool calls, this endpoint can be used to submit the outputs from the - tool calls once they are ready. - parameters: - - name: agent_id - in: path - description: The ID of the agent to resume. - required: true - schema: - type: string - - name: session_id - in: path - description: The ID of the session to resume. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to resume. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ResumeAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: - get: - responses: - '200': - description: An AgentStepResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentStepResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent step by its ID. - description: Retrieve an agent step by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the step for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the step for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get the step for. - required: true - schema: - type: string - - name: step_id - in: path - description: The ID of the step to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/sessions: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all session(s) of a given agent. - description: List all session(s) of a given agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to list sessions for. - required: true - schema: - type: string - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of sessions to return. - required: false - schema: - type: integer - deprecated: false + schema: + type: string + description: 'Path parameter: dataset_id' + deprecated: true /v1alpha/eval/benchmarks: get: responses: @@ -3783,47 +3381,50 @@ paths: schema: $ref: '#/components/schemas/ListBenchmarksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: List all benchmarks. + - Benchmarks + summary: List Benchmarks description: List all benchmarks. - parameters: [] - deprecated: false + operationId: list_benchmarks_v1alpha_eval_benchmarks_get post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Benchmarks - summary: Register a benchmark. + - Benchmarks + summary: Register Benchmark description: Register a benchmark. - parameters: [] + operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterBenchmarkRequest' required: true - deprecated: false + deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}: get: responses: @@ -3834,131 +3435,136 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: Get a benchmark by its ID. + - Benchmarks + summary: Get Benchmark description: Get a benchmark by its ID. + operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to get. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Benchmarks - summary: Unregister a benchmark. + - Benchmarks + summary: Unregister Benchmark description: Unregister a benchmark. + operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to unregister. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: responses: '200': - description: >- - EvaluateResponse object containing generations and scores. + description: EvaluateResponse object containing generations and scores. content: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Evaluate a list of rows on a benchmark. + - Eval + summary: Evaluate Rows description: Evaluate a list of rows on a benchmark. + operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/EvaluateRowsRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs: post: responses: '200': - description: >- - The job that was created to run the evaluation. + description: The job that was created to run the evaluation. content: application/json: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Run an evaluation on a benchmark. + - Eval + summary: Run Eval description: Run an evaluation on a benchmark. + operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/RunEvalRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -3969,67 +3575,69 @@ paths: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the status of a job. + - Eval + summary: Job Status description: Get the status of a job. + operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Eval - summary: Cancel a job. + - Eval + summary: Job Cancel description: Cancel a job. + operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: get: responses: @@ -4040,68 +3648,67 @@ paths: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the result of a job. + - Eval + summary: Job Result description: Get the result of a job. + operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/inference/rerank: post: responses: '200': - description: >- - RerankResponse with indices sorted by relevance score (descending). + description: RerankResponse with indices sorted by relevance score (descending). content: application/json: schema: $ref: '#/components/schemas/RerankResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: >- - Rerank a list of documents based on their relevance to a query. - description: >- - Rerank a list of documents based on their relevance to a query. - parameters: [] + - Inference + summary: Rerank + description: Rerank a list of documents based on their relevance to a query. + operationId: rerank_v1alpha_inference_rerank_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RerankRequest' required: true - deprecated: false /v1alpha/post-training/job/artifacts: get: responses: @@ -4113,54 +3720,56 @@ paths: $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the artifacts of a training job. + - Post Training + summary: Get Training Job Artifacts description: Get the artifacts of a training job. + operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the artifacts of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/job/cancel: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - PostTraining (Coming Soon) - summary: Cancel a training job. + - Post Training + summary: Cancel Training Job description: Cancel a training job. - parameters: [] + operationId: cancel_training_job_v1alpha_post_training_job_cancel_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CancelTrainingJobRequest' required: true - deprecated: false /v1alpha/post-training/job/status: get: responses: @@ -4172,27 +3781,28 @@ paths: $ref: '#/components/schemas/PostTrainingJobStatusResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the status of a training job. + - Post Training + summary: Get Training Job Status description: Get the status of a training job. + operationId: get_training_job_status_v1alpha_post_training_job_status_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/jobs: get: responses: @@ -4203,21 +3813,22 @@ paths: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Get all training jobs. + - Post Training + summary: Get Training Jobs description: Get all training jobs. - parameters: [] - deprecated: false + operationId: get_training_jobs_v1alpha_post_training_jobs_get /v1alpha/post-training/preference-optimize: post: responses: @@ -4228,27 +3839,28 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run preference optimization of a model. + - Post Training + summary: Preference Optimize description: Run preference optimization of a model. - parameters: [] + operationId: preference_optimize_v1alpha_post_training_preference_optimize_post requestBody: content: application/json: schema: $ref: '#/components/schemas/PreferenceOptimizeRequest' required: true - deprecated: false /v1alpha/post-training/supervised-fine-tune: post: responses: @@ -4259,1148 +3871,1277 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run supervised fine-tuning of a model. + - Post Training + summary: Supervised Fine Tune description: Run supervised fine-tuning of a model. - parameters: [] + operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post requestBody: content: application/json: schema: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true - deprecated: false -jsonSchemaDialect: >- - https://json-schema.org/draft/2020-12/schema components: schemas: Error: - type: object + description: Error response from the API. Roughly follows RFC 7807. properties: status: + title: Status type: integer - description: HTTP status code title: + title: Title type: string - description: >- - Error title, a short summary of the error which is invariant for an error - type detail: + title: Detail type: string - description: >- - Error detail, a longer human-readable description of the error instance: - type: string - description: >- - (Optional) A URL which can be used to retrieve more information about - the specific occurrence of the error - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - status - - title - - detail + - status + - title + - detail title: Error - description: >- - Error response from the API. Roughly follows RFC 7807. - Order: - type: string - enum: - - asc - - desc - title: Order - description: Sort order for paginated responses. - ListOpenAIChatCompletionResponse: type: object + ListBatchesResponse: properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - description: >- - List of chat completion objects with their input messages - has_more: - type: boolean - description: >- - Whether there are more completions available beyond this list - first_id: - type: string - description: ID of the first completion in this list - last_id: - type: string - description: ID of the last completion in this list object: type: string const: list + title: Object default: list - description: >- - Must be "list" to identify this as a list response - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIChatCompletionResponse - description: >- - Response from listing OpenAI-compatible chat completions. - OpenAIAssistantMessageParam: + data: + items: + $ref: '#/components/schemas/Batch' + type: array + title: Data + description: List of batch objects + first_id: + anyOf: + - type: string + - type: 'null' + description: ID of the first batch in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: ID of the last batch in the list + has_more: + type: boolean + title: Has More + description: Whether there are more batches available + default: false type: object + required: + - data + title: ListBatchesResponse + description: Response containing a list of batch objects. + CreateBatchRequest: + properties: + input_file_id: + type: string + title: Input File Id + endpoint: + type: string + title: Endpoint + completion_window: + type: string + const: 24h + title: Completion Window + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + idempotency_key: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + properties: + id: + type: string + title: Id + completion_window: + type: string + title: Completion Window + created_at: + type: integer + title: Created At + endpoint: + type: string + title: Endpoint + input_file_id: + type: string + title: Input File Id + object: + type: string + const: batch + title: Object + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + title: Status + cancelled_at: + anyOf: + - type: integer + - type: 'null' + cancelling_at: + anyOf: + - type: integer + - type: 'null' + completed_at: + anyOf: + - type: integer + - type: 'null' + error_file_id: + anyOf: + - type: string + - type: 'null' + errors: + anyOf: + - $ref: '#/components/schemas/Errors' + title: Errors + - type: 'null' + title: Errors + expired_at: + anyOf: + - type: integer + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + failed_at: + anyOf: + - type: integer + - type: 'null' + finalizing_at: + anyOf: + - type: integer + - type: 'null' + in_progress_at: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + model: + anyOf: + - type: string + - type: 'null' + output_file_id: + anyOf: + - type: string + - type: 'null' + request_counts: + anyOf: + - $ref: '#/components/schemas/BatchRequestCounts' + title: BatchRequestCounts + - type: 'null' + title: BatchRequestCounts + usage: + anyOf: + - $ref: '#/components/schemas/BatchUsage' + title: BatchUsage + - type: 'null' + title: BatchUsage + additionalProperties: true + type: object + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIChatCompletionResponse + description: Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. properties: role: - type: string const: assistant default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the model's response - name: + title: Role type: string - description: >- - (Optional) The name of the assistant message participant. + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + nullable: true + name: + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: >- - List of tool calls. Each tool call is an OpenAIChatCompletionToolCall - object. - additionalProperties: false - required: - - role + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true title: OpenAIAssistantMessageParam - description: >- - A message containing the model's (assistant) response in an OpenAI-compatible - chat completion request. - "OpenAIChatCompletionContentPartImageParam": type: object + OpenAIChatCompletionContentPartImageParam: properties: type: type: string const: image_url + title: Type default: image_url - description: >- - Must be "image_url" to identify this as image content image_url: $ref: '#/components/schemas/OpenAIImageURL' - description: >- - Image URL specification and processing details - additionalProperties: false - required: - - type - - image_url - title: >- - OpenAIChatCompletionContentPartImageParam - description: >- - Image content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionContentPartParam: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - - $ref: '#/components/schemas/OpenAIFile' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - file: '#/components/schemas/OpenAIFile' - OpenAIChatCompletionContentPartTextParam: type: object + required: + - image_url + title: OpenAIChatCompletionContentPartImageParam + description: Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + OpenAIChatCompletionContentPartTextParam: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to identify this as text content text: type: string - description: The text content of the message - additionalProperties: false - required: - - type - - text - title: OpenAIChatCompletionContentPartTextParam - description: >- - Text content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionToolCall: + title: Text type: object + required: + - text + title: OpenAIChatCompletionContentPartTextParam + description: Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: properties: index: - type: integer - description: >- - (Optional) Index of the tool call in the list + anyOf: + - type: integer + - type: 'null' id: - type: string - description: >- - (Optional) Unique identifier for the tool call + anyOf: + - type: string + - type: 'null' type: type: string const: function + title: Type default: function - description: >- - Must be "function" to identify this as a function call function: - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' - description: (Optional) Function call details - additionalProperties: false - required: - - type - title: OpenAIChatCompletionToolCall - description: >- - Tool call specification for OpenAI-compatible chat completion responses. - OpenAIChatCompletionToolCallFunction: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + title: OpenAIChatCompletionToolCallFunction + - type: 'null' + title: OpenAIChatCompletionToolCallFunction type: object + title: OpenAIChatCompletionToolCall + description: Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: properties: name: - type: string - description: (Optional) Name of the function to call + anyOf: + - type: string + - type: 'null' arguments: - type: string - description: >- - (Optional) Arguments to pass to the function as a JSON string - additionalProperties: false - title: OpenAIChatCompletionToolCallFunction - description: >- - Function call details for OpenAI-compatible tool calls. - OpenAIChatCompletionUsage: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIChatCompletionToolCallFunction + description: Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: properties: prompt_tokens: type: integer - description: Number of tokens in the prompt + title: Prompt Tokens completion_tokens: type: integer - description: Number of tokens in the completion + title: Completion Tokens total_tokens: type: integer - description: Total tokens used (prompt + completion) + title: Total Tokens prompt_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - title: >- - OpenAIChatCompletionUsagePromptTokensDetails - description: >- - Token details for prompt tokens in OpenAI chat completion usage. + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails' + title: OpenAIChatCompletionUsagePromptTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsagePromptTokensDetails completion_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - title: >- - OpenAIChatCompletionUsageCompletionTokensDetails - description: >- - Token details for output tokens in OpenAI chat completion usage. - additionalProperties: false - required: - - prompt_tokens - - completion_tokens - - total_tokens - title: OpenAIChatCompletionUsage - description: >- - Usage information for OpenAI chat completion. - OpenAIChoice: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails' + title: OpenAIChatCompletionUsageCompletionTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsageCompletionTokensDetails type: object + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: Usage information for OpenAI chat completion. + OpenAIChoice: properties: message: oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam-Output | ... (5 variants) discriminator: propertyName: role mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' developer: '#/components/schemas/OpenAIDeveloperMessageParam' - description: The message from the model + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' finish_reason: type: string - description: The reason the model stopped generating + title: Finish Reason index: type: integer - description: The index of the choice + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - required: - - message - - finish_reason - - index - title: OpenAIChoice - description: >- - A choice from an OpenAI-compatible chat completion response. - OpenAIChoiceLogprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: properties: content: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' refusal: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - title: OpenAIChoiceLogprobs - description: >- - The log probabilities for the tokens in the message from an OpenAI-compatible - chat completion response. - OpenAIDeveloperMessageParam: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' type: object + title: OpenAIChoiceLogprobs + description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + OpenAIDeveloperMessageParam: properties: role: type: string const: developer + title: Role default: developer - description: >- - Must be "developer" to identify this as a developer message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the developer message + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the developer message participant. - additionalProperties: false - required: - - role - - content - title: OpenAIDeveloperMessageParam - description: >- - A message from the developer in an OpenAI-compatible chat completion request. - OpenAIFile: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAIDeveloperMessageParam + description: A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: properties: type: type: string const: file + title: Type default: file file: $ref: '#/components/schemas/OpenAIFileFile' - additionalProperties: false + type: object required: - - type - - file + - file title: OpenAIFile OpenAIFileFile: - type: object properties: file_data: - type: string + anyOf: + - type: string + - type: 'null' file_id: - type: string + anyOf: + - type: string + - type: 'null' filename: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object title: OpenAIFileFile OpenAIImageURL: - type: object properties: url: type: string - description: >- - URL of the image to include in the message + title: Url detail: - type: string - description: >- - (Optional) Level of detail for image processing. Can be "low", "high", - or "auto" - additionalProperties: false - required: - - url - title: OpenAIImageURL - description: >- - Image URL specification for OpenAI-compatible chat completion messages. - OpenAIMessageParam: - oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' - developer: '#/components/schemas/OpenAIDeveloperMessageParam' - OpenAISystemMessageParam: + anyOf: + - type: string + - type: 'null' type: object + required: + - url + title: OpenAIImageURL + description: Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + discriminator: + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam' + propertyName: role + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + title: OpenAIUserMessageParam + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + title: OpenAIAssistantMessageParam + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam | ... (5 variants) + OpenAISystemMessageParam: properties: role: type: string const: system + title: Role default: system - description: >- - Must be "system" to identify this as a system message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the system message participant. - additionalProperties: false - required: - - role - - content - title: OpenAISystemMessageParam - description: >- - A system message providing instructions or context to the model. - OpenAITokenLogProb: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAISystemMessageParam + description: A system message providing instructions or context to the model. + OpenAITokenLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number + title: Logprob top_logprobs: - type: array items: $ref: '#/components/schemas/OpenAITopLogProb' - additionalProperties: false - required: - - token - - logprob - - top_logprobs - title: OpenAITokenLogProb - description: >- - The log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIToolMessageParam: + type: array + title: Top Logprobs type: object + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: |- + The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + OpenAIToolMessageParam: properties: role: type: string const: tool + title: Role default: tool - description: >- - Must be "tool" to identify this as a tool response tool_call_id: type: string - description: >- - Unique identifier for the tool call this response is for + title: Tool Call Id content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The response content from the tool - additionalProperties: false - required: - - role - - tool_call_id - - content - title: OpenAIToolMessageParam - description: >- - A message representing the result of a tool invocation in an OpenAI-compatible - chat completion request. - OpenAITopLogProb: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] type: object + required: + - tool_call_id + - content + title: OpenAIToolMessageParam + description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + OpenAITopLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number - additionalProperties: false - required: - - token - - logprob - title: OpenAITopLogProb - description: >- - The top log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIUserMessageParam: + title: Logprob type: object + required: + - token + - logprob + title: OpenAITopLogProb + description: |- + The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + OpenAIUserMessageParam: + description: A message from the user in an OpenAI-compatible chat completion request. properties: role: - type: string const: user default: user - description: >- - Must be "user" to identify this as a user message - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' - description: >- - The content of the message, which can include text and other media - name: + title: Role type: string - description: >- - (Optional) The name of the user message participant. - additionalProperties: false + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + nullable: true required: - - role - - content + - content title: OpenAIUserMessageParam - description: >- - A message from the user in an OpenAI-compatible chat completion request. - OpenAIJSONSchema: type: object + OpenAIJSONSchema: properties: name: type: string - description: Name of the schema + title: Name description: - type: string - description: (Optional) Description of the schema + anyOf: + - type: string + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict adherence to the schema + anyOf: + - type: boolean + - type: 'null' schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The JSON schema definition - additionalProperties: false - required: - - name - title: OpenAIJSONSchema - description: >- - JSON schema specification for OpenAI-compatible structured response format. - OpenAIResponseFormatJSONObject: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: OpenAIJSONSchema + description: JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: properties: type: type: string const: json_object + title: Type default: json_object - description: >- - Must be "json_object" to indicate generic JSON object response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatJSONObject - description: >- - JSON object response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatJSONSchema: type: object + title: OpenAIResponseFormatJSONObject + description: JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: properties: type: type: string const: json_schema + title: Type default: json_schema - description: >- - Must be "json_schema" to indicate structured JSON response format json_schema: $ref: '#/components/schemas/OpenAIJSONSchema' - description: >- - The JSON schema specification for the response - additionalProperties: false - required: - - type - - json_schema - title: OpenAIResponseFormatJSONSchema - description: >- - JSON schema response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatParam: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseFormatText' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIResponseFormatText' - json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' - json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' - OpenAIResponseFormatText: type: object + required: + - json_schema + title: OpenAIResponseFormatJSONSchema + description: JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + discriminator: + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + OpenAIResponseFormatText: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to indicate plain text response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatText - description: >- - Text response format for OpenAI-compatible chat completion requests. - OpenAIChatCompletionRequestWithExtraBody: type: object + title: OpenAIResponseFormatText + description: Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: List of messages in the conversation. - frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - function_call: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The function call to use. - functions: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) List of functions to use. - logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. - logprobs: - type: boolean - description: (Optional) The log probabilities to use. - max_completion_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - n: - type: integer - description: >- - (Optional) The number of completions to generate. - parallel_tool_calls: - type: boolean - description: >- - (Optional) Whether to parallelize tool calls. - presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - response_format: - $ref: '#/components/schemas/OpenAIResponseFormatParam' - description: (Optional) The response format to use. - seed: - type: integer - description: (Optional) The seed to use. - stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. - stream: - type: boolean - description: >- - (Optional) Whether to stream the response. - stream_options: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. - temperature: - type: number - description: (Optional) The temperature to use. - tool_choice: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tool choice to use. - tools: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) type: array - items: + minItems: 1 + title: Messages + frequency_penalty: + anyOf: + - type: number + - type: 'null' + function_call: + anyOf: + - type: string + - additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tools to use. + - type: 'null' + title: string | object + functions: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_completion_tokens: + anyOf: + - type: integer + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + response_format: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + discriminator: + propertyName: type + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + - type: 'null' + title: Response Format + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + tools: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' top_logprobs: - type: integer - description: >- - (Optional) The top log probabilities to use. + anyOf: + - type: integer + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. - additionalProperties: false - required: - - model - - messages - title: OpenAIChatCompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible chat completion endpoint. - OpenAIChatCompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - additionalProperties: false - required: - - id - - choices - - object - - created - - model - title: OpenAIChatCompletion - description: >- - Response from an OpenAI-compatible chat completion request. - OpenAIChatCompletionChunk: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage type: object + required: + - id + - choices + - created + - model + title: OpenAIChatCompletion + description: Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + description: Chunk from a streaming response to an OpenAI-compatible chat completion request. properties: id: + title: Id type: string - description: The ID of the chat completion choices: - type: array items: $ref: '#/components/schemas/OpenAIChunkChoice' - description: List of choices + title: Choices + type: array object: - type: string const: chat.completion.chunk default: chat.completion.chunk - description: >- - The object type, which will be "chat.completion.chunk" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: + title: Object + type: string + created: + title: Created + type: integer + model: + title: Model type: string - description: >- - The model that was used to generate the chat completion usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information (typically included in final chunk with stream_options) - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + nullable: true + title: OpenAIChatCompletionUsage required: - - id - - choices - - object - - created - - model + - id + - choices + - created + - model title: OpenAIChatCompletionChunk - description: >- - Chunk from a streaming response to an OpenAI-compatible chat completion request. - OpenAIChoiceDelta: type: object + OpenAIChoiceDelta: + description: A delta from an OpenAI-compatible chat completion streaming response. properties: content: - type: string - description: (Optional) The content of the delta + anyOf: + - type: string + - type: 'null' + nullable: true refusal: - type: string - description: (Optional) The refusal of the delta + anyOf: + - type: string + - type: 'null' + nullable: true role: - type: string - description: (Optional) The role of the delta + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: (Optional) The tool calls of the delta + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true reasoning_content: - type: string - description: >- - (Optional) The reasoning content from the model (non-standard, for o1/o3 - models) - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true title: OpenAIChoiceDelta - description: >- - A delta from an OpenAI-compatible chat completion streaming response. - OpenAIChunkChoice: type: object + OpenAIChunkChoice: + description: A chunk choice from an OpenAI-compatible chat completion streaming response. properties: delta: $ref: '#/components/schemas/OpenAIChoiceDelta' - description: The delta from the chunk finish_reason: + title: Finish Reason type: string - description: The reason the model stopped generating index: + title: Index type: integer - description: The index of the choice logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + nullable: true + title: OpenAIChoiceLogprobs required: - - delta - - finish_reason - - index + - delta + - finish_reason + - index title: OpenAIChunkChoice - description: >- - A chunk choice from an OpenAI-compatible chat completion streaming response. - OpenAICompletionWithInputMessages: type: object + OpenAICompletionWithInputMessages: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage input_messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output | ... (5 variants) + type: array + title: Input Messages + type: object required: - - id - - choices - - object - - created - - model - - input_messages + - id + - choices + - created + - model + - input_messages title: OpenAICompletionWithInputMessages OpenAICompletionRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model prompt: - oneOf: - - type: string - - type: array - items: - type: string - - type: array + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - items: + type: integer + type: array + title: list[integer] + - items: items: type: integer - - type: array - items: - type: array - items: - type: integer - description: The prompt to generate a completion for. + type: array + type: array + title: list[array] + title: string | ... (4 variants) best_of: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' echo: - type: boolean - description: (Optional) Whether to echo the prompt. + anyOf: + - type: boolean + - type: 'null' frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' logprobs: - type: boolean - description: (Optional) The log probabilities to use. + anyOf: + - type: boolean + - type: 'null' max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. + anyOf: + - type: integer + - type: 'null' n: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' seed: - type: integer - description: (Optional) The seed to use. + anyOf: + - type: integer + - type: 'null' stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] stream: - type: boolean - description: >- - (Optional) Whether to stream the response. + anyOf: + - type: boolean + - type: 'null' stream_options: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. + anyOf: + - additionalProperties: true + type: object + - type: 'null' temperature: - type: number - description: (Optional) The temperature to use. + anyOf: + - type: number + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. + anyOf: + - type: string + - type: 'null' suffix: - type: string - description: >- - (Optional) The suffix that should be appended to the completion. - additionalProperties: false - required: - - model - - prompt - title: OpenAICompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible completion endpoint. - OpenAICompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: properties: id: type: string + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAICompletionChoice' + type: array + title: Choices created: type: integer + title: Created model: type: string + title: Model object: type: string const: text_completion + title: Object default: text_completion - additionalProperties: false - required: - - id - - choices - - created - - model - - object - title: OpenAICompletion - description: >- - Response from an OpenAI-compatible completion request. - OpenAICompletionChoice: type: object + required: + - id + - choices + - created + - model + title: OpenAICompletion + description: |- + Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + OpenAICompletionChoice: properties: finish_reason: type: string + title: Finish Reason text: type: string + title: Text index: type: integer + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs + type: object required: - - finish_reason - - text - - index + - finish_reason + - text + - index title: OpenAICompletionChoice - description: >- + description: |- A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice ConversationItem: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' @@ -5408,8106 +5149,7885 @@ components: mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) OpenAIResponseAnnotationCitation: - type: object properties: type: type: string const: url_citation + title: Type default: url_citation - description: >- - Annotation type identifier, always "url_citation" end_index: type: integer - description: >- - End position of the citation span in the content + title: End Index start_index: type: integer - description: >- - Start position of the citation span in the content + title: Start Index title: type: string - description: Title of the referenced web resource + title: Title url: type: string - description: URL of the referenced web resource - additionalProperties: false - required: - - type - - end_index - - start_index - - title - - url - title: OpenAIResponseAnnotationCitation - description: >- - URL citation annotation for referencing external web resources. - "OpenAIResponseAnnotationContainerFileCitation": + title: Url type: object + required: + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: URL citation annotation for referencing external web resources. + OpenAIResponseAnnotationContainerFileCitation: properties: type: type: string const: container_file_citation + title: Type default: container_file_citation container_id: type: string + title: Container Id end_index: type: integer + title: End Index file_id: type: string + title: File Id filename: type: string + title: Filename start_index: type: integer - additionalProperties: false - required: - - type - - container_id - - end_index - - file_id - - filename - - start_index - title: >- - OpenAIResponseAnnotationContainerFileCitation - OpenAIResponseAnnotationFileCitation: + title: Start Index type: object + required: + - container_id + - end_index + - file_id + - filename + - start_index + title: OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: properties: type: type: string const: file_citation + title: Type default: file_citation - description: >- - Annotation type identifier, always "file_citation" file_id: type: string - description: Unique identifier of the referenced file + title: File Id filename: type: string - description: Name of the referenced file + title: Filename index: type: integer - description: >- - Position index of the citation within the content - additionalProperties: false - required: - - type - - file_id - - filename - - index - title: OpenAIResponseAnnotationFileCitation - description: >- - File citation annotation for referencing specific files in response content. - OpenAIResponseAnnotationFilePath: + title: Index type: object + required: + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: properties: type: type: string const: file_path + title: Type default: file_path file_id: type: string + title: File Id index: type: integer - additionalProperties: false + title: Index + type: object required: - - type - - file_id - - index + - file_id + - index title: OpenAIResponseAnnotationFilePath OpenAIResponseAnnotations: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) OpenAIResponseContentPartRefusal: - type: object properties: type: type: string const: refusal + title: Type default: refusal - description: >- - Content part type identifier, always "refusal" refusal: type: string - description: Refusal text supplied by the model - additionalProperties: false - required: - - type - - refusal - title: OpenAIResponseContentPartRefusal - description: >- - Refusal content within a streamed response part. - "OpenAIResponseInputFunctionToolCallOutput": + title: Refusal type: object + required: + - refusal + title: OpenAIResponseContentPartRefusal + description: Refusal content within a streamed response part. + OpenAIResponseInputFunctionToolCallOutput: properties: call_id: type: string + title: Call Id output: type: string + title: Output type: type: string const: function_call_output + title: Type default: function_call_output id: - type: string + anyOf: + - type: string + - type: 'null' status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. - OpenAIResponseInputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - discriminator: - propertyName: type - mapping: - input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' - input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' - OpenAIResponseInputMessageContentImage: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - output + title: OpenAIResponseInputFunctionToolCallOutput + description: This represents the output of a function call that gets passed back to the model. + OpenAIResponseInputMessageContent: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + OpenAIResponseInputMessageContentFile: + properties: + type: + type: string + const: input_file + title: Type + default: input_file + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + file_url: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentFile + description: File content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentImage: properties: detail: - oneOf: - - type: string - const: low - - type: string - const: high - - type: string - const: auto + title: Detail default: auto - description: >- - Level of detail for image processing, can be "low", "high", or "auto" + type: string + enum: + - low + - high + - auto type: type: string const: input_image + title: Type default: input_image - description: >- - Content type identifier, always "input_image" + file_id: + anyOf: + - type: string + - type: 'null' image_url: - type: string - description: (Optional) URL of the image content - additionalProperties: false - required: - - detail - - type - title: OpenAIResponseInputMessageContentImage - description: >- - Image content for input messages in OpenAI response format. - OpenAIResponseInputMessageContentText: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIResponseInputMessageContentImage + description: Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: properties: text: type: string - description: The text content of the input message + title: Text type: type: string const: input_text + title: Type default: input_text - description: >- - Content type identifier, always "input_text" - additionalProperties: false - required: - - text - - type - title: OpenAIResponseInputMessageContentText - description: >- - Text content for input messages in OpenAI response format. - OpenAIResponseMCPApprovalRequest: type: object + required: + - text + title: OpenAIResponseInputMessageContentText + description: Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: properties: arguments: type: string + title: Arguments id: type: string + title: Id name: type: string + title: Name server_label: type: string + title: Server Label type: type: string const: mcp_approval_request + title: Type default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: type: object + required: + - arguments + - id + - name + - server_label + title: OpenAIResponseMCPApprovalRequest + description: A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: properties: approval_request_id: type: string + title: Approval Request Id approve: type: boolean + title: Approve type: type: string const: mcp_approval_response + title: Type default: mcp_approval_response id: - type: string + anyOf: + - type: string + - type: 'null' reason: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - approval_request_id - - approve - - type + - approval_request_id + - approve title: OpenAIResponseMCPApprovalResponse description: A response to an MCP approval request. OpenAIResponseMessage: - type: object + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. properties: content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputMessageContent' - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + anyOf: + - type: string + - items: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] role: - oneOf: - - type: string - const: system - - type: string - const: developer - - type: string - const: user - - type: string - const: assistant - type: + title: Role type: string + enum: + - system + - developer + - user + - assistant + default: system + type: const: message default: message + title: Type + type: string id: - type: string + anyOf: + - type: string + - type: 'null' + nullable: true status: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - content - - role - - type + - content + - role title: OpenAIResponseMessage - description: >- - Corresponds to the various Message types in the Responses API. They are all - under one type because the Responses API gives them all the same "type" value, - and there is no way to tell them apart in certain scenarios. + type: object OpenAIResponseOutputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' - "OpenAIResponseOutputMessageContentOutputText": - type: object + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + OpenAIResponseOutputMessageContentOutputText: properties: text: type: string + title: Text type: type: string const: output_text + title: Type default: output_text annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - additionalProperties: false - required: - - text - - type - - annotations - title: >- - OpenAIResponseOutputMessageContentOutputText - "OpenAIResponseOutputMessageFileSearchToolCall": + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + discriminator: + propertyName: type + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + type: array + title: Annotations type: object + required: + - text + title: OpenAIResponseOutputMessageContentOutputText + OpenAIResponseOutputMessageFileSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id queries: - type: array items: type: string - description: List of search queries executed + type: array + title: Queries status: type: string - description: >- - Current status of the file search operation + title: Status type: type: string const: file_search_call + title: Type default: file_search_call - description: >- - Tool call type identifier, always "file_search_call" results: - type: array - items: - type: object - properties: - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes associated with the file - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: >- - Relevance score for this search result (between 0 and 1) - text: - type: string - description: Text content of the search result - additionalProperties: false - required: - - attributes - - file_id - - filename - - score - - text - title: >- - OpenAIResponseOutputMessageFileSearchToolCallResults - description: >- - Search results returned by the file search operation. - description: >- - (Optional) Search results returned by the file search operation - additionalProperties: false - required: - - id - - queries - - status - - type - title: >- - OpenAIResponseOutputMessageFileSearchToolCall - description: >- - File search tool call output message for OpenAI responses. - "OpenAIResponseOutputMessageFunctionToolCall": + anyOf: + - items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults' + type: array + - type: 'null' type: object + required: + - id + - queries + - status + title: OpenAIResponseOutputMessageFileSearchToolCall + description: File search tool call output message for OpenAI responses. + OpenAIResponseOutputMessageFunctionToolCall: properties: call_id: type: string - description: Unique identifier for the function call + title: Call Id name: type: string - description: Name of the function being called + title: Name arguments: type: string - description: >- - JSON string containing the function arguments + title: Arguments type: type: string const: function_call + title: Type default: function_call - description: >- - Tool call type identifier, always "function_call" id: - type: string - description: >- - (Optional) Additional identifier for the tool call + anyOf: + - type: string + - type: 'null' status: - type: string - description: >- - (Optional) Current status of the function call execution - additionalProperties: false - required: - - call_id - - name - - arguments - - type - title: >- - OpenAIResponseOutputMessageFunctionToolCall - description: >- - Function tool call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPCall: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - name + - arguments + title: OpenAIResponseOutputMessageFunctionToolCall + description: Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: properties: id: type: string - description: Unique identifier for this MCP call + title: Id type: type: string const: mcp_call + title: Type default: mcp_call - description: >- - Tool call type identifier, always "mcp_call" arguments: type: string - description: >- - JSON string containing the MCP call arguments + title: Arguments name: type: string - description: Name of the MCP method being called + title: Name server_label: type: string - description: >- - Label identifying the MCP server handling the call + title: Server Label error: - type: string - description: >- - (Optional) Error message if the MCP call failed + anyOf: + - type: string + - type: 'null' output: - type: string - description: >- - (Optional) Output result from the successful MCP call - additionalProperties: false - required: - - id - - type - - arguments - - name - - server_label - title: OpenAIResponseOutputMessageMCPCall - description: >- - Model Context Protocol (MCP) call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPListTools: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: properties: id: type: string - description: >- - Unique identifier for this MCP list tools operation + title: Id type: type: string const: mcp_list_tools + title: Type default: mcp_list_tools - description: >- - Tool call type identifier, always "mcp_list_tools" server_label: type: string - description: >- - Label identifying the MCP server providing the tools + title: Server Label tools: - type: array items: - type: object - properties: - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - JSON schema defining the tool's input parameters - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Description of what the tool does - additionalProperties: false - required: - - input_schema - - name - title: MCPListToolsTool - description: >- - Tool definition returned by MCP list tools operation. - description: >- - List of available tools provided by the MCP server - additionalProperties: false - required: - - id - - type - - server_label - - tools - title: OpenAIResponseOutputMessageMCPListTools - description: >- - MCP list tools output message containing available tools from an MCP server. - "OpenAIResponseOutputMessageWebSearchToolCall": + $ref: '#/components/schemas/MCPListToolsTool' + type: array + title: Tools type: object + required: + - id + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: MCP list tools output message containing available tools from an MCP server. + OpenAIResponseOutputMessageWebSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id status: type: string - description: >- - Current status of the web search operation + title: Status type: type: string const: web_search_call + title: Type default: web_search_call - description: >- - Tool call type identifier, always "web_search_call" - additionalProperties: false - required: - - id - - status - - type - title: >- - OpenAIResponseOutputMessageWebSearchToolCall - description: >- - Web search tool call output message for OpenAI responses. - CreateConversationRequest: type: object + required: + - id + - status + title: OpenAIResponseOutputMessageWebSearchToolCall + description: Web search tool call output message for OpenAI responses. + CreateConversationRequest: properties: items: - type: array - items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Initial items to include in the conversation context. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + - type: 'null' metadata: - type: object - additionalProperties: - type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object title: CreateConversationRequest Conversation: - type: object properties: id: type: string + title: Id + description: The unique ID of the conversation. object: type: string const: conversation + title: Object + description: The object type, which is always conversation. default: conversation created_at: type: integer + title: Created At + description: The time at which the conversation was created, measured in seconds since the Unix epoch. metadata: - type: object - additionalProperties: - type: string - items: - type: array - items: + anyOf: + - additionalProperties: + type: string type: object - title: dict - description: >- - dict() -> new empty dictionary dict(mapping) -> new dictionary initialized - from a mapping object's (key, value) pairs dict(iterable) -> new - dictionary initialized as if via: d = {} for k, v in iterable: d[k] - = v dict(**kwargs) -> new dictionary initialized with the name=value - pairs in the keyword argument list. For example: dict(one=1, two=2) - additionalProperties: false + - type: 'null' + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. + items: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + type: object required: - - id - - object - - created_at + - id + - created_at title: Conversation description: OpenAI-compatible conversation object. UpdateConversationRequest: - type: object properties: metadata: - type: object additionalProperties: type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + type: object + title: Metadata + type: object required: - - metadata + - metadata title: UpdateConversationRequest ConversationDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted conversation identifier object: type: string + title: Object + description: Object type default: conversation.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationDeletedResource description: Response for deleted conversation. ConversationItemList: - type: object properties: object: type: string + title: Object + description: Object type default: list data: - type: array items: - $ref: '#/components/schemas/ConversationItem' + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (9 variants) + type: array + title: Data + description: List of conversation items first_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the first item in the list last_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the last item in the list has_more: type: boolean + title: Has More + description: Whether there are more items available default: false - additionalProperties: false - required: - - object - - data - - has_more - title: ConversationItemList - description: >- - List of conversation items with pagination. - AddItemsRequest: type: object + required: + - data + title: ConversationItemList + description: List of conversation items with pagination. + AddItemsRequest: properties: items: - type: array items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Items to include in the conversation context. - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + title: Items + type: object required: - - items + - items title: AddItemsRequest ConversationItemDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted item identifier object: type: string + title: Object + description: Object type default: conversation.item.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationItemDeletedResource description: Response for deleted conversation item. OpenAIEmbeddingsRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be an embedding model - registered with Llama Stack and available via the /models endpoint. + title: Model input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input text to embed, encoded as a string or array of strings. To embed - multiple inputs in a single request, pass an array of strings. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] encoding_format: - type: string + anyOf: + - type: string + - type: 'null' default: float - description: >- - (Optional) The format to return the embeddings in. Can be either "float" - or "base64". Defaults to "float". dimensions: - type: integer - description: >- - (Optional) The number of dimensions the resulting output embeddings should - have. Only supported in text-embedding-3 and later models. + anyOf: + - type: integer + - type: 'null' user: - type: string - description: >- - (Optional) A unique identifier representing your end-user, which can help - OpenAI to monitor and detect abuse. - additionalProperties: false - required: - - model - - input - title: OpenAIEmbeddingsRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible embeddings endpoint. - OpenAIEmbeddingData: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: properties: object: type: string const: embedding + title: Object default: embedding - description: >- - The object type, which will be "embedding" embedding: - oneOf: - - type: array - items: - type: number - - type: string - description: >- - The embedding vector as a list of floats (when encoding_format="float") - or as a base64-encoded string (when encoding_format="base64") + anyOf: + - items: + type: number + type: array + title: list[number] + - type: string + title: list[number] | string index: type: integer - description: >- - The index of the embedding in the input list - additionalProperties: false - required: - - object - - embedding - - index - title: OpenAIEmbeddingData - description: >- - A single embedding data object from an OpenAI-compatible embeddings response. - OpenAIEmbeddingUsage: + title: Index type: object + required: + - embedding + - index + title: OpenAIEmbeddingData + description: A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: properties: prompt_tokens: type: integer - description: The number of tokens in the input + title: Prompt Tokens total_tokens: type: integer - description: The total number of tokens used - additionalProperties: false - required: - - prompt_tokens - - total_tokens - title: OpenAIEmbeddingUsage - description: >- - Usage information for an OpenAI-compatible embeddings response. - OpenAIEmbeddingsResponse: + title: Total Tokens type: object + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: properties: object: type: string const: list + title: Object default: list - description: The object type, which will be "list" data: - type: array items: $ref: '#/components/schemas/OpenAIEmbeddingData' - description: List of embedding data objects + type: array + title: Data model: type: string - description: >- - The model that was used to generate the embeddings + title: Model usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' - description: Usage information - additionalProperties: false + type: object required: - - object - - data - - model - - usage + - data + - model + - usage title: OpenAIEmbeddingsResponse - description: >- - Response from an OpenAI-compatible embeddings request. + description: Response from an OpenAI-compatible embeddings request. OpenAIFilePurpose: type: string enum: - - assistants - - batch + - assistants + - batch title: OpenAIFilePurpose - description: >- - Valid purpose values for OpenAI Files API. + description: Valid purpose values for OpenAI Files API. ListOpenAIFileResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/OpenAIFileObject' - description: List of file objects + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more files available beyond this page + title: Has More first_id: type: string - description: >- - ID of the first file in the list for pagination + title: First Id last_id: type: string - description: >- - ID of the last file in the list for pagination + title: Last Id object: type: string const: list + title: Object default: list - description: The object type, which is always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIFileResponse - description: >- - Response for listing files in OpenAI Files API. - OpenAIFileObject: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIFileResponse + description: Response for listing files in OpenAI Files API. + OpenAIFileObject: properties: object: type: string const: file + title: Object default: file - description: The object type, which is always "file" id: type: string - description: >- - The file identifier, which can be referenced in the API endpoints + title: Id bytes: type: integer - description: The size of the file, in bytes + title: Bytes created_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file was created + title: Created At expires_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file expires + title: Expires At filename: type: string - description: The name of the file + title: Filename purpose: - type: string - enum: - - assistants - - batch - description: The intended purpose of the file - additionalProperties: false - required: - - object - - id - - bytes - - created_at - - expires_at - - filename - - purpose - title: OpenAIFileObject - description: >- - OpenAI File object as defined in the OpenAI Files API. - ExpiresAfter: + $ref: '#/components/schemas/OpenAIFilePurpose' type: object + required: + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: properties: anchor: type: string const: created_at + title: Anchor seconds: type: integer - additionalProperties: false + maximum: 2592000.0 + minimum: 3600.0 + title: Seconds + type: object required: - - anchor - - seconds + - anchor + - seconds title: ExpiresAfter - description: >- + description: |- Control expiration of uploaded files. Params: - anchor, must be "created_at" - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) OpenAIFileDeleteResponse: - type: object properties: id: type: string - description: The file identifier that was deleted + title: Id object: type: string const: file + title: Object default: file - description: The object type, which is always "file" deleted: type: boolean - description: >- - Whether the file was successfully deleted - additionalProperties: false + title: Deleted + type: object required: - - id - - object - - deleted + - id + - deleted title: OpenAIFileDeleteResponse - description: >- - Response for deleting a file in OpenAI Files API. + description: Response for deleting a file in OpenAI Files API. Response: - type: object title: Response - HealthInfo: type: object + HealthInfo: properties: status: - type: string - enum: - - OK - - Error - - Not Implemented - description: Current health status of the service - additionalProperties: false - required: - - status - title: HealthInfo - description: >- - Health status information for the service. - RouteInfo: + $ref: '#/components/schemas/HealthStatus' type: object + required: + - status + title: HealthInfo + description: Health status information for the service. + RouteInfo: properties: route: type: string - description: The API endpoint path + title: Route method: type: string - description: HTTP method for the route + title: Method provider_types: - type: array items: type: string - description: >- - List of provider types that implement this route - additionalProperties: false - required: - - route - - method - - provider_types - title: RouteInfo - description: >- - Information about an API route including its path, method, and implementing - providers. - ListRoutesResponse: + type: array + title: Provider Types type: object + required: + - route + - method + - provider_types + title: RouteInfo + description: Information about an API route including its path, method, and implementing providers. + ListRoutesResponse: properties: data: - type: array items: $ref: '#/components/schemas/RouteInfo' - description: >- - List of available route information objects - additionalProperties: false - required: - - data - title: ListRoutesResponse - description: >- - Response containing a list of all available API routes. - Model: + type: array + title: Data type: object + required: + - data + title: ListRoutesResponse + description: Response containing a list of all available API routes. + OpenAIModel: + properties: + id: + type: string + title: Id + object: + type: string + const: model + title: Object + default: model + created: + type: integer + title: Created + owned_by: + type: string + title: Owned By + custom_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - id + - created + - owned_by + title: OpenAIModel + description: |- + A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + OpenAIListModelsResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAIModel' + type: array + title: Data + type: object + required: + - data + title: OpenAIListModelsResponse + Model: properties: identifier: type: string - description: >- - Unique identifier for this resource in llama stack + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string - description: >- - Unique identifier for this resource in the provider + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string - description: >- - ID of the provider that owns this resource + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: model + title: Type default: model - description: >- - The resource type, always 'model' for model resources metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata description: Any additional metadata for this model model_type: $ref: '#/components/schemas/ModelType' default: llm - description: >- - The type of model (LLM or embedding model) - additionalProperties: false + type: object required: - - identifier - - provider_id - - type - - metadata - - model_type + - identifier + - provider_id title: Model - description: >- - A model resource representing an AI model registered in Llama Stack. + description: A model resource representing an AI model registered in Llama Stack. ModelType: type: string enum: - - llm - - embedding + - llm + - embedding + - rerank title: ModelType - description: >- - Enumeration of supported model types in Llama Stack. - ListModelsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Model' - additionalProperties: false - required: - - data - title: ListModelsResponse - RegisterModelRequest: - type: object - properties: - model_id: - type: string - description: The identifier of the model to register. - provider_model_id: - type: string - description: >- - The identifier of the model in the provider. - provider_id: - type: string - description: The identifier of the provider. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Any additional metadata for this model. - model_type: - $ref: '#/components/schemas/ModelType' - description: The type of model to register. - additionalProperties: false - required: - - model_id - title: RegisterModelRequest + description: Enumeration of supported model types in Llama Stack. RunModerationRequest: - type: object properties: input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input (or inputs) to classify. Can be a single string, an array of strings, - or an array of multi-modal input objects similar to other models. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] model: - type: string - description: >- - The content moderation model you would like to use. - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - input - - model + - input title: RunModerationRequest ModerationObject: - type: object properties: id: type: string - description: >- - The unique identifier for the moderation request. + title: Id model: type: string - description: >- - The model used to generate the moderation results. + title: Model results: - type: array items: $ref: '#/components/schemas/ModerationObjectResults' - description: A list of moderation objects - additionalProperties: false + type: array + title: Results + type: object required: - - id - - model - - results + - id + - model + - results title: ModerationObject description: A moderation object. ModerationObjectResults: - type: object properties: flagged: type: boolean - description: >- - Whether any of the below categories are flagged. + title: Flagged categories: - type: object - additionalProperties: - type: boolean - description: >- - A list of the categories, and whether they are flagged or not. + anyOf: + - additionalProperties: + type: boolean + type: object + - type: 'null' category_applied_input_types: - type: object - additionalProperties: - type: array - items: - type: string - description: >- - A list of the categories along with the input type(s) that the score applies - to. + anyOf: + - additionalProperties: + items: + type: string + type: array + type: object + - type: 'null' category_scores: - type: object - additionalProperties: - type: number - description: >- - A list of the categories along with their scores as predicted by model. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' user_message: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false + title: Metadata + type: object required: - - flagged - - metadata + - flagged title: ModerationObjectResults description: A moderation object. Prompt: - type: object properties: prompt: - type: string - description: >- - The system prompt text with variable placeholders. Variables are only - supported when using the Responses API. + anyOf: + - type: string + - type: 'null' + description: The system prompt with variable placeholders version: type: integer - description: >- - Version (integer starting at 1, incremented on save) + minimum: 1.0 + title: Version + description: Version (integer starting at 1, incremented on save) prompt_id: type: string - description: >- - Unique identifier formatted as 'pmpt_<48-digit-hash>' + title: Prompt Id + description: Unique identifier in format 'pmpt_<48-digit-hash>' variables: - type: array items: type: string - description: >- - List of prompt variable names that can be used in the prompt template + type: array + title: Variables + description: List of variable names that can be used in the prompt template is_default: type: boolean + title: Is Default + description: Boolean indicating whether this version is the default version default: false - description: >- - Boolean indicating whether this version is the default version for this - prompt - additionalProperties: false - required: - - version - - prompt_id - - variables - - is_default - title: Prompt - description: >- - A prompt resource representing a stored OpenAI Compatible prompt template - in Llama Stack. - ListPromptsResponse: type: object + required: + - version + - prompt_id + title: Prompt + description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + ListPromptsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Prompt' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListPromptsResponse description: Response model to list prompts. CreatePromptRequest: - type: object properties: prompt: type: string - description: >- - The prompt text content with variable placeholders. + title: Prompt variables: - type: array - items: - type: string - description: >- - List of variable names that can be used in the prompt template. - additionalProperties: false + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object required: - - prompt + - prompt title: CreatePromptRequest UpdatePromptRequest: - type: object properties: prompt: type: string - description: The updated prompt text content. + title: Prompt version: type: integer - description: >- - The current version of the prompt being updated. + title: Version variables: - type: array - items: - type: string - description: >- - Updated list of variable names that can be used in the prompt template. + anyOf: + - items: + type: string + type: array + - type: 'null' set_as_default: type: boolean - description: >- - Set the new version as the default (default=True). - additionalProperties: false + title: Set As Default + default: true + type: object required: - - prompt - - version - - set_as_default + - prompt + - version title: UpdatePromptRequest SetDefaultVersionRequest: - type: object properties: version: type: integer - description: The version to set as default. - additionalProperties: false + title: Version + type: object required: - - version + - version title: SetDefaultVersionRequest ProviderInfo: - type: object properties: api: type: string - description: The API name this provider implements + title: Api provider_id: type: string - description: Unique identifier for the provider + title: Provider Id provider_type: type: string - description: The type of provider implementation + title: Provider Type config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Configuration parameters for the provider + title: Config health: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Current health status of the provider - additionalProperties: false - required: - - api - - provider_id - - provider_type - - config - - health - title: ProviderInfo - description: >- - Information about a registered provider including its configuration and health - status. - ListProvidersResponse: + title: Health type: object + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: Information about a registered provider including its configuration and health status. + ListProvidersResponse: properties: data: - type: array items: $ref: '#/components/schemas/ProviderInfo' - description: List of provider information objects - additionalProperties: false - required: - - data - title: ListProvidersResponse - description: >- - Response containing a list of all available providers. - ListOpenAIResponseObject: + type: array + title: Data type: object + required: + - data + title: ListProvidersResponse + description: Response containing a list of all available providers. + ListOpenAIResponseObject: properties: data: - type: array items: $ref: '#/components/schemas/OpenAIResponseObjectWithInput' - description: >- - List of response objects with their input context + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more results available beyond this page + title: Has More first_id: type: string - description: >- - Identifier of the first item in this page + title: First Id last_id: type: string - description: Identifier of the last item in this page + title: Last Id object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIResponseObject - description: >- - Paginated list of OpenAI response objects with navigation metadata. - OpenAIResponseError: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIResponseObject + description: Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: properties: code: type: string - description: >- - Error code identifying the type of failure + title: Code message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: OpenAIResponseError - description: >- - Error details for failed OpenAI response requests. - OpenAIResponseInput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMessage' - OpenAIResponseInputToolFileSearch: + title: Message type: object + required: + - code + - message + title: OpenAIResponseError + description: Error details for failed OpenAI response requests. + OpenAIResponseInput: + anyOf: + - discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage + OpenAIResponseInputToolFileSearch: properties: type: type: string const: file_search + title: Type default: file_search - description: >- - Tool type identifier, always "file_search" vector_store_ids: - type: array items: type: string - description: >- - List of vector store identifiers to search within + type: array + title: Vector Store Ids filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional filters to apply to the search + anyOf: + - additionalProperties: true + type: object + - type: 'null' max_num_results: - type: integer + anyOf: + - type: integer + maximum: 50.0 + minimum: 1.0 + - type: 'null' default: 10 - description: >- - (Optional) Maximum number of search results to return (1-50) ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: - type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - (Optional) Options for ranking and scoring search results - additionalProperties: false - required: - - type - - vector_store_ids - title: OpenAIResponseInputToolFileSearch - description: >- - File search tool configuration for OpenAI response inputs. - OpenAIResponseInputToolFunction: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions type: object + required: + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: properties: type: type: string const: function + title: Type default: function - description: Tool type identifier, always "function" name: type: string - description: Name of the function that can be called + title: Name description: - type: string - description: >- - (Optional) Description of what the function does + anyOf: + - type: string + - type: 'null' parameters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON schema defining the function's parameters + anyOf: + - additionalProperties: true + type: object + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict parameter validation - additionalProperties: false - required: - - type - - name - title: OpenAIResponseInputToolFunction - description: >- - Function tool configuration for OpenAI response inputs. - OpenAIResponseInputToolWebSearch: + anyOf: + - type: boolean + - type: 'null' type: object + required: + - name + - parameters + title: OpenAIResponseInputToolFunction + description: Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: properties: type: - oneOf: - - type: string - const: web_search - - type: string - const: web_search_preview - - type: string - const: web_search_preview_2025_03_11 + title: Type default: web_search - description: Web search tool type variant to use - search_context_size: type: string + enum: + - web_search + - web_search_preview + - web_search_preview_2025_03_11 + - web_search_2025_08_26 + search_context_size: + anyOf: + - type: string + pattern: ^low|medium|high$ + - type: 'null' default: medium - description: >- - (Optional) Size of search context, must be "low", "medium", or "high" - additionalProperties: false - required: - - type - title: OpenAIResponseInputToolWebSearch - description: >- - Web search tool configuration for OpenAI response inputs. - OpenAIResponseObjectWithInput: type: object + title: OpenAIResponseInputToolWebSearch + description: Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: >- - List of input items that led to this response - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Input + type: object required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - - input + - created_at + - id + - model + - output + - status + - input title: OpenAIResponseObjectWithInput - description: >- - OpenAI response object extended with input context information. + description: OpenAI response object extended with input context information. OpenAIResponseOutput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - OpenAIResponseText: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + OpenAIResponsePrompt: + properties: + id: + type: string + title: Id + variables: + anyOf: + - additionalProperties: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: object + - type: 'null' + version: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + title: OpenAIResponsePrompt + description: OpenAI compatible Prompt object that is used in OpenAI responses. + OpenAIResponseText: properties: format: - type: object - properties: - type: - oneOf: - - type: string - const: text - - type: string - const: json_schema - - type: string - const: json_object - description: >- - Must be "text", "json_schema", or "json_object" to identify the format - type - name: - type: string - description: >- - The name of the response format. Only used for json_schema. - schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. Only used for json_schema. - description: - type: string - description: >- - (Optional) A description of the response format. Only used for json_schema. - strict: - type: boolean - description: >- - (Optional) Whether to strictly enforce the JSON schema. If true, the - response must match the schema exactly. Only used for json_schema. - additionalProperties: false - required: - - type - description: >- - (Optional) Text format configuration specifying output format requirements - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseTextFormat' + title: OpenAIResponseTextFormat + - type: 'null' + title: OpenAIResponseTextFormat + type: object title: OpenAIResponseText - description: >- - Text response configuration for OpenAI responses. + description: Text response configuration for OpenAI responses. OpenAIResponseTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - title: OpenAIResponseToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response object. - OpenAIResponseUsage: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + title: OpenAIResponseToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: properties: input_tokens: type: integer - description: Number of tokens in the input + title: Input Tokens output_tokens: type: integer - description: Number of tokens in the output + title: Output Tokens total_tokens: type: integer - description: Total tokens used (input + output) + title: Total Tokens input_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - description: Detailed breakdown of input token usage + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails' + title: OpenAIResponseUsageInputTokensDetails + - type: 'null' + title: OpenAIResponseUsageInputTokensDetails output_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - description: Detailed breakdown of output token usage - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails' + title: OpenAIResponseUsageOutputTokensDetails + - type: 'null' + title: OpenAIResponseUsageOutputTokensDetails + type: object required: - - input_tokens - - output_tokens - - total_tokens + - input_tokens + - output_tokens + - total_tokens title: OpenAIResponseUsage description: Usage information for OpenAI response. ResponseGuardrailSpec: - type: object + description: Specification for a guardrail to apply during response generation. properties: type: + title: Type type: string - description: The type/identifier of the guardrail. - additionalProperties: false required: - - type + - type title: ResponseGuardrailSpec - description: >- - Specification for a guardrail to apply during response generation. + type: object OpenAIResponseInputTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseInputToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label server_url: type: string - description: URL endpoint of the MCP server + title: Server Url headers: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) HTTP headers to include when connecting to the server + anyOf: + - additionalProperties: true + type: object + - type: 'null' + authorization: + anyOf: + - type: string + - type: 'null' require_approval: - oneOf: - - type: string - const: always - - type: string - const: never - - type: object - properties: - always: - type: array - items: - type: string - description: >- - (Optional) List of tool names that always require approval - never: - type: array - items: - type: string - description: >- - (Optional) List of tool names that never require approval - additionalProperties: false - title: ApprovalFilter - description: >- - Filter configuration for MCP tool approval requirements. + anyOf: + - type: string + const: always + - type: string + const: never + - $ref: '#/components/schemas/ApprovalFilter' + title: ApprovalFilter + title: string | ApprovalFilter default: never - description: >- - Approval requirement for tool calls ("always", "never", or filter) allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - - server_url - - require_approval - title: OpenAIResponseInputToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response inputs. - CreateOpenaiResponseRequest: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + - server_url + title: OpenAIResponseInputToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: properties: input: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: Input message(s) to create the response. + anyOf: + - type: string + - items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input + type: array + title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] + title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] model: type: string - description: The underlying LLM used for completions. + title: Model + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt instructions: - type: string + anyOf: + - type: string + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) if specified, the new response will be a continuation of the - previous response. This can be used to easily fork-off new responses from - existing responses. + anyOf: + - type: string + - type: 'null' conversation: - type: string - description: >- - (Optional) The ID of a conversation to add the response to. Must begin - with 'conv_'. Input and output messages will be automatically added to - the conversation. + anyOf: + - type: string + - type: 'null' store: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: true stream: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: false temperature: - type: number + anyOf: + - type: number + - type: 'null' text: - $ref: '#/components/schemas/OpenAIResponseText' + anyOf: + - $ref: '#/components/schemas/OpenAIResponseText' + title: OpenAIResponseText + - type: 'null' + title: OpenAIResponseText tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputTool' + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' include: - type: array - items: - type: string - description: >- - (Optional) Additional fields to include in the response. + anyOf: + - items: + type: string + type: array + - type: 'null' max_infer_iters: - type: integer - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + default: 10 + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object required: - - input - - model + - input + - model title: CreateOpenaiResponseRequest OpenAIResponseObject: - type: object properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context - additionalProperties: false - required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - title: OpenAIResponseObject - description: >- - Complete OpenAI response object containing generation results and metadata. - OpenAIResponseContentPartOutputText: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object + required: + - created_at + - id + - model + - output + - status + title: OpenAIResponseObject + description: Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + description: Text content within a streamed response part. properties: type: - type: string const: output_text default: output_text - description: >- - Content part type identifier, always "output_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Text emitted for this content part annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - description: >- - Structured annotations associated with the text + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + title: Annotations + type: array logprobs: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) Token log probability details - additionalProperties: false + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + nullable: true required: - - type - - text - - annotations + - text title: OpenAIResponseContentPartOutputText - description: >- - Text content within a streamed response part. - "OpenAIResponseContentPartReasoningSummary": type: object + OpenAIResponseContentPartReasoningSummary: + description: Reasoning summary part in a streamed response. properties: type: - type: string const: summary_text default: summary_text - description: >- - Content part type identifier, always "summary_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Summary text - additionalProperties: false required: - - type - - text - title: >- - OpenAIResponseContentPartReasoningSummary - description: >- - Reasoning summary part in a streamed response. - OpenAIResponseContentPartReasoningText: + - text + title: OpenAIResponseContentPartReasoningSummary type: object + OpenAIResponseContentPartReasoningText: + description: Reasoning text emitted as part of a streamed response. properties: type: - type: string const: reasoning_text default: reasoning_text - description: >- - Content part type identifier, always "reasoning_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Reasoning text supplied by the model - additionalProperties: false required: - - type - - text + - text title: OpenAIResponseContentPartReasoningText - description: >- - Reasoning text emitted as part of a streamed response. + type: object OpenAIResponseObjectStream: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: - propertyName: type mapping: - response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' - "OpenAIResponseObjectStreamResponseCompleted": - type: object + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + title: OpenAIResponseObjectStreamResponseCreated + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + title: OpenAIResponseObjectStreamResponseInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + title: OpenAIResponseObjectStreamResponseOutputItemAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + title: OpenAIResponseObjectStreamResponseOutputItemDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + title: OpenAIResponseObjectStreamResponseOutputTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + title: OpenAIResponseObjectStreamResponseOutputTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + title: OpenAIResponseObjectStreamResponseMcpCallFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + title: OpenAIResponseObjectStreamResponseContentPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + title: OpenAIResponseObjectStreamResponseContentPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + title: OpenAIResponseObjectStreamResponseReasoningTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + title: OpenAIResponseObjectStreamResponseRefusalDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + title: OpenAIResponseObjectStreamResponseRefusalDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + title: OpenAIResponseObjectStreamResponseIncomplete + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + title: OpenAIResponseObjectStreamResponseFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + title: OpenAIResponseObjectStreamResponseCompleted + title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + OpenAIResponseObjectStreamResponseCompleted: + description: Streaming event indicating a response has been completed. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Completed response object type: - type: string const: response.completed default: response.completed - description: >- - Event type identifier, always "response.completed" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCompleted - description: >- - Streaming event indicating a response has been completed. - "OpenAIResponseObjectStreamResponseContentPartAdded": + - response + title: OpenAIResponseObjectStreamResponseCompleted type: object + OpenAIResponseObjectStreamResponseContentPartAdded: + description: Streaming event for when a new content part is added to a response item. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The content part that was added + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.added default: response.content_part.added - description: >- - Event type identifier, always "response.content_part.added" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartAdded - description: >- - Streaming event for when a new content part is added to a response item. - "OpenAIResponseObjectStreamResponseContentPartDone": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartAdded type: object + OpenAIResponseObjectStreamResponseContentPartDone: + description: Streaming event for when a content part is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The completed content part + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.done default: response.content_part.done - description: >- - Event type identifier, always "response.content_part.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartDone - description: >- - Streaming event for when a content part is completed. - "OpenAIResponseObjectStreamResponseCreated": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartDone type: object + OpenAIResponseObjectStreamResponseCreated: + description: Streaming event indicating a new response has been created. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The response object that was created type: - type: string const: response.created default: response.created - description: >- - Event type identifier, always "response.created" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCreated - description: >- - Streaming event indicating a new response has been created. - OpenAIResponseObjectStreamResponseFailed: + - response + title: OpenAIResponseObjectStreamResponseCreated type: object + OpenAIResponseObjectStreamResponseFailed: + description: Streaming event emitted when a response fails. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Response object describing the failure sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.failed default: response.failed - description: >- - Event type identifier, always "response.failed" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type + - response + - sequence_number title: OpenAIResponseObjectStreamResponseFailed - description: >- - Streaming event emitted when a response fails. - "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": type: object + OpenAIResponseObjectStreamResponseFileSearchCallCompleted: + description: Streaming event for completed file search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.completed default: response.file_search_call.completed - description: >- - Event type identifier, always "response.file_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallCompleted - description: >- - Streaming event for completed file search calls. - "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseFileSearchCallInProgress: + description: Streaming event for file search calls in progress. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.in_progress default: response.file_search_call.in_progress - description: >- - Event type identifier, always "response.file_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallInProgress - description: >- - Streaming event for file search calls in progress. - "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseFileSearchCallSearching: + description: Streaming event for file search currently searching. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.searching default: response.file_search_call.searching - description: >- - Event type identifier, always "response.file_search_call.searching" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallSearching - description: >- - Streaming event for file search currently searching. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta: + description: Streaming event for incremental function call argument updates. properties: delta: + title: Delta type: string - description: >- - Incremental function call arguments being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the function call being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.delta default: response.function_call_arguments.delta - description: >- - Event type identifier, always "response.function_call_arguments.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta - description: >- - Streaming event for incremental function call argument updates. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone: + description: Streaming event for when function call arguments are completed. properties: arguments: + title: Arguments type: string - description: >- - Final complete arguments JSON string for the function call item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed function call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.done default: response.function_call_arguments.done - description: >- - Event type identifier, always "response.function_call_arguments.done" - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone - description: >- - Streaming event for when function call arguments are completed. - "OpenAIResponseObjectStreamResponseInProgress": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseInProgress: + description: Streaming event indicating the response remains in progress. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Current response state while in progress sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.in_progress default: response.in_progress - description: >- - Event type identifier, always "response.in_progress" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseInProgress - description: >- - Streaming event indicating the response remains in progress. - "OpenAIResponseObjectStreamResponseIncomplete": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseInProgress type: object + OpenAIResponseObjectStreamResponseIncomplete: + description: Streaming event emitted when a response ends in an incomplete state. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: >- - Response object describing the incomplete state sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.incomplete default: response.incomplete - description: >- - Event type identifier, always "response.incomplete" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseIncomplete - description: >- - Streaming event emitted when a response ends in an incomplete state. - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseIncomplete type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta: properties: delta: + title: Delta type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.delta default: response.mcp_call.arguments.delta - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone: properties: arguments: + title: Arguments type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.done default: response.mcp_call.arguments.done - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDone - "OpenAIResponseObjectStreamResponseMcpCallCompleted": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseMcpCallCompleted: + description: Streaming event for completed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.completed default: response.mcp_call.completed - description: >- - Event type identifier, always "response.mcp_call.completed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallCompleted - description: Streaming event for completed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallCompleted type: object + OpenAIResponseObjectStreamResponseMcpCallFailed: + description: Streaming event for failed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.failed default: response.mcp_call.failed - description: >- - Event type identifier, always "response.mcp_call.failed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallFailed - description: Streaming event for failed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallFailed type: object + OpenAIResponseObjectStreamResponseMcpCallInProgress: + description: Streaming event for MCP calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the MCP call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.in_progress default: response.mcp_call.in_progress - description: >- - Event type identifier, always "response.mcp_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallInProgress - description: >- - Streaming event for MCP calls in progress. - "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallInProgress type: object + OpenAIResponseObjectStreamResponseMcpListToolsCompleted: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.completed default: response.mcp_list_tools.completed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsCompleted - "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted type: object + OpenAIResponseObjectStreamResponseMcpListToolsFailed: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.failed default: response.mcp_list_tools.failed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsFailed - "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed type: object + OpenAIResponseObjectStreamResponseMcpListToolsInProgress: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.in_progress default: response.mcp_list_tools.in_progress - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsInProgress - "OpenAIResponseObjectStreamResponseOutputItemAdded": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress type: object + OpenAIResponseObjectStreamResponseOutputItemAdded: + description: Streaming event for when a new output item is added to the response. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The output item that was added (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.added default: response.output_item.added - description: >- - Event type identifier, always "response.output_item.added" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemAdded - description: >- - Streaming event for when a new output item is added to the response. - "OpenAIResponseObjectStreamResponseOutputItemDone": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemAdded type: object + OpenAIResponseObjectStreamResponseOutputItemDone: + description: Streaming event for when an output item is completed. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The completed output item (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.done default: response.output_item.done - description: >- - Event type identifier, always "response.output_item.done" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemDone - description: >- - Streaming event for when an output item is completed. - "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemDone type: object + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded: + description: Streaming event for when an annotation is added to output text. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the item to which the annotation is being added output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response's output array content_index: + title: Content Index type: integer - description: >- - Index position of the content part within the output item annotation_index: + title: Annotation Index type: integer - description: >- - Index of the annotation within the content part annotation: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' - description: The annotation object being added + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.annotation.added default: response.output_text.annotation.added - description: >- - Event type identifier, always "response.output_text.annotation.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - content_index - - annotation_index - - annotation - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded - description: >- - Streaming event for when an annotation is added to output text. - "OpenAIResponseObjectStreamResponseOutputTextDelta": + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded type: object + OpenAIResponseObjectStreamResponseOutputTextDelta: + description: Streaming event for incremental text content updates. properties: content_index: + title: Content Index type: integer - description: Index position within the text content delta: + title: Delta type: string - description: Incremental text content being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.delta default: response.output_text.delta - description: >- - Event type identifier, always "response.output_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDelta - description: >- - Streaming event for incremental text content updates. - "OpenAIResponseObjectStreamResponseOutputTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDelta type: object + OpenAIResponseObjectStreamResponseOutputTextDone: + description: Streaming event for when text output is completed. properties: content_index: + title: Content Index type: integer - description: Index position within the text content text: + title: Text type: string - description: >- - Final complete text content of the output item item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.done default: response.output_text.done - description: >- - Event type identifier, always "response.output_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDone - description: >- - Streaming event for when text output is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded: + description: Streaming event for when a new reasoning summary part is added. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The summary part that was added sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.added default: response.reasoning_summary_part.added - description: >- - Event type identifier, always "response.reasoning_summary_part.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded - description: >- - Streaming event for when a new reasoning summary part is added. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone: + description: Streaming event for when a reasoning summary part is completed. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The completed summary part sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.done default: response.reasoning_summary_part.done - description: >- - Event type identifier, always "response.reasoning_summary_part.done" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartDone - description: >- - Streaming event for when a reasoning summary part is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta: + description: Streaming event for incremental reasoning summary text updates. properties: delta: + title: Delta type: string - description: Incremental summary text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.delta default: response.reasoning_summary_text.delta - description: >- - Event type identifier, always "response.reasoning_summary_text.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta - description: >- - Streaming event for incremental reasoning summary text updates. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + - delta + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone: + description: Streaming event for when reasoning summary text is completed. properties: text: + title: Text type: string - description: Final complete summary text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.done default: response.reasoning_summary_text.done - description: >- - Event type identifier, always "response.reasoning_summary_text.done" - additionalProperties: false + title: Type + type: string required: - - text - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDone - description: >- - Streaming event for when reasoning summary text is completed. - "OpenAIResponseObjectStreamResponseReasoningTextDelta": + - text + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone type: object + OpenAIResponseObjectStreamResponseReasoningTextDelta: + description: Streaming event for incremental reasoning text updates. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part delta: + title: Delta type: string - description: Incremental reasoning text being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.delta default: response.reasoning_text.delta - description: >- - Event type identifier, always "response.reasoning_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDelta - description: >- - Streaming event for incremental reasoning text updates. - "OpenAIResponseObjectStreamResponseReasoningTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningTextDone: + description: Streaming event for when reasoning text is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part text: + title: Text type: string - description: Final complete reasoning text item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.done default: response.reasoning_text.done - description: >- - Event type identifier, always "response.reasoning_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDone - description: >- - Streaming event for when reasoning text is completed. - "OpenAIResponseObjectStreamResponseRefusalDelta": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDone type: object + OpenAIResponseObjectStreamResponseRefusalDelta: + description: Streaming event for incremental refusal text updates. properties: content_index: + title: Content Index type: integer - description: Index position of the content part delta: + title: Delta type: string - description: Incremental refusal text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.delta default: response.refusal.delta - description: >- - Event type identifier, always "response.refusal.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDelta - description: >- - Streaming event for incremental refusal text updates. - "OpenAIResponseObjectStreamResponseRefusalDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDelta type: object + OpenAIResponseObjectStreamResponseRefusalDone: + description: Streaming event for when refusal text is completed. properties: content_index: + title: Content Index type: integer - description: Index position of the content part refusal: + title: Refusal type: string - description: Final complete refusal text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.done default: response.refusal.done - description: >- - Event type identifier, always "response.refusal.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - refusal - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDone - description: >- - Streaming event for when refusal text is completed. - "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + - content_index + - refusal + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDone type: object + OpenAIResponseObjectStreamResponseWebSearchCallCompleted: + description: Streaming event for completed web search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.completed default: response.web_search_call.completed - description: >- - Event type identifier, always "response.web_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallCompleted - description: >- - Streaming event for completed web search calls. - "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseWebSearchCallInProgress: + description: Streaming event for web search calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.in_progress default: response.web_search_call.in_progress - description: >- - Event type identifier, always "response.web_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallInProgress - description: >- - Streaming event for web search calls in progress. - "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseWebSearchCallSearching: properties: item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.web_search_call.searching default: response.web_search_call.searching - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallSearching - OpenAIDeleteResponseObject: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching type: object + OpenAIDeleteResponseObject: properties: id: type: string - description: >- - Unique identifier of the deleted response + title: Id object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" deleted: type: boolean + title: Deleted default: true - description: Deletion confirmation flag, always True - additionalProperties: false - required: - - id - - object - - deleted - title: OpenAIDeleteResponseObject - description: >- - Response object confirming deletion of an OpenAI response. - ListOpenAIResponseInputItem: type: object + required: + - id + title: OpenAIDeleteResponseObject + description: Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: properties: data: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: List of input items + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Data object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - object - title: ListOpenAIResponseInputItem - description: >- - List container for OpenAI response input items. - RunShieldRequest: type: object + required: + - data + title: ListOpenAIResponseInputItem + description: List container for OpenAI response input items. + RunShieldRequest: properties: shield_id: type: string - description: The identifier of the shield to run. + title: Shield Id messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: The messages to run the shield on. - params: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) + type: array + title: Messages + params: + additionalProperties: true + type: object + title: Params + type: object required: - - shield_id - - messages - - params + - shield_id + - messages + - params title: RunShieldRequest RunShieldResponse: - type: object properties: violation: - $ref: '#/components/schemas/SafetyViolation' - description: >- - (Optional) Safety violation detected by the shield, if any - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/SafetyViolation' + title: SafetyViolation + - type: 'null' + title: SafetyViolation + type: object title: RunShieldResponse description: Response from running a safety shield. SafetyViolation: - type: object properties: violation_level: $ref: '#/components/schemas/ViolationLevel' - description: Severity level of the violation user_message: - type: string - description: >- - (Optional) Message to convey to the user about the violation + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata including specific violation codes for debugging and - telemetry - additionalProperties: false + title: Metadata + type: object required: - - violation_level - - metadata + - violation_level title: SafetyViolation - description: >- - Details of a safety violation detected by content moderation. + description: Details of a safety violation detected by content moderation. ViolationLevel: type: string enum: - - info - - warn - - error + - info + - warn + - error title: ViolationLevel description: Severity level of a safety violation. - AgentTurnInputType: - type: object - properties: - type: - type: string - const: agent_turn_input - default: agent_turn_input - description: >- - Discriminator type. Always "agent_turn_input" - additionalProperties: false - required: - - type - title: AgentTurnInputType - description: Parameter type for agent turn input. AggregationFunctionType: type: string enum: - - average - - weighted_average - - median - - categorical_count - - accuracy + - average + - weighted_average + - median + - categorical_count + - accuracy title: AggregationFunctionType - description: >- - Types of aggregation functions for scoring results. + description: Types of aggregation functions for scoring results. ArrayType: - type: object properties: type: type: string const: array + title: Type default: array - description: Discriminator type. Always "array" - additionalProperties: false - required: - - type + type: object title: ArrayType description: Parameter type for array values. BasicScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: basic + title: Type default: basic - description: >- - The type of scoring function parameters, always basic aggregation_functions: - type: array items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - aggregation_functions - title: BasicScoringFnParams - description: >- - Parameters for basic scoring function configuration. - BooleanType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: BasicScoringFnParams + description: Parameters for basic scoring function configuration. + BooleanType: properties: type: type: string const: boolean + title: Type default: boolean - description: Discriminator type. Always "boolean" - additionalProperties: false - required: - - type + type: object title: BooleanType description: Parameter type for boolean values. ChatCompletionInputType: - type: object properties: type: type: string const: chat_completion_input + title: Type default: chat_completion_input - description: >- - Discriminator type. Always "chat_completion_input" - additionalProperties: false - required: - - type - title: ChatCompletionInputType - description: >- - Parameter type for chat completion input. - CompletionInputType: type: object + title: ChatCompletionInputType + description: Parameter type for chat completion input. + CompletionInputType: properties: type: type: string const: completion_input + title: Type default: completion_input - description: >- - Discriminator type. Always "completion_input" - additionalProperties: false - required: - - type + type: object title: CompletionInputType description: Parameter type for completion input. JsonType: - type: object properties: type: type: string const: json + title: Type default: json - description: Discriminator type. Always "json" - additionalProperties: false - required: - - type + type: object title: JsonType description: Parameter type for JSON values. LLMAsJudgeScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: llm_as_judge + title: Type default: llm_as_judge - description: >- - The type of scoring function parameters, always llm_as_judge judge_model: type: string - description: >- - Identifier of the LLM model to use as a judge for scoring + title: Judge Model prompt_template: - type: string - description: >- - (Optional) Custom prompt template for the judge model + anyOf: + - type: string + - type: 'null' judge_score_regexes: - type: array items: type: string - description: >- - Regexes to extract the answer from generated response - aggregation_functions: type: array + title: Judge Score Regexes + description: Regexes to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - judge_model - - judge_score_regexes - - aggregation_functions - title: LLMAsJudgeScoringFnParams - description: >- - Parameters for LLM-as-judge scoring function configuration. - NumberType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + required: + - judge_model + title: LLMAsJudgeScoringFnParams + description: Parameters for LLM-as-judge scoring function configuration. + NumberType: properties: type: type: string const: number + title: Type default: number - description: Discriminator type. Always "number" - additionalProperties: false - required: - - type + type: object title: NumberType description: Parameter type for numeric values. ObjectType: - type: object properties: type: type: string const: object + title: Type default: object - description: Discriminator type. Always "object" - additionalProperties: false - required: - - type + type: object title: ObjectType description: Parameter type for object values. RegexParserScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: regex_parser + title: Type default: regex_parser - description: >- - The type of scoring function parameters, always regex_parser parsing_regexes: - type: array items: type: string - description: >- - Regex to extract the answer from generated response - aggregation_functions: type: array + title: Parsing Regexes + description: Regex to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - parsing_regexes - - aggregation_functions - title: RegexParserScoringFnParams - description: >- - Parameters for regex parser scoring function configuration. - ScoringFn: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: RegexParserScoringFnParams + description: Parameters for regex parser scoring function configuration. + ScoringFn: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: scoring_function + title: Type default: scoring_function - description: >- - The resource type, always scoring_function description: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata + description: Any additional metadata for this definition return_type: oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + description: The return type of the deterministic function discriminator: propertyName: type mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' + boolean: '#/components/schemas/BooleanType' chat_completion_input: '#/components/schemas/ChatCompletionInputType' completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval + type: object required: - - identifier - - provider_id - - type - - metadata - - return_type + - identifier + - provider_id + - return_type title: ScoringFn - description: >- - A scoring function resource for evaluating model outputs. + description: A scoring function resource for evaluating model outputs. ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' discriminator: - propertyName: type mapping: + basic: '#/components/schemas/BasicScoringFnParams' llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams ScoringFnParamsType: - type: string + description: Types of scoring function parameter configurations. enum: - - llm_as_judge - - regex_parser - - basic + - llm_as_judge + - regex_parser + - basic title: ScoringFnParamsType - description: >- - Types of scoring function parameter configurations. + type: string StringType: - type: object properties: type: type: string const: string + title: Type default: string - description: Discriminator type. Always "string" - additionalProperties: false - required: - - type + type: object title: StringType description: Parameter type for string values. UnionType: - type: object properties: type: type: string const: union + title: Type default: union - description: Discriminator type. Always "union" - additionalProperties: false - required: - - type + type: object title: UnionType description: Parameter type for union values. ListScoringFunctionsResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/ScoringFn' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListScoringFunctionsResponse - ParamType: - oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' - discriminator: - propertyName: type - mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' - array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' - chat_completion_input: '#/components/schemas/ChatCompletionInputType' - completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' - RegisterScoringFunctionRequest: - type: object - properties: - scoring_fn_id: - type: string - description: >- - The ID of the scoring function to register. - description: - type: string - description: The description of the scoring function. - return_type: - $ref: '#/components/schemas/ParamType' - description: The return type of the scoring function. - provider_scoring_fn_id: - type: string - description: >- - The ID of the provider scoring function to use for the scoring function. - provider_id: - type: string - description: >- - The ID of the provider to use for the scoring function. - params: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - The parameters for the scoring function for benchmark eval, these can - be overridden for app eval. - additionalProperties: false - required: - - scoring_fn_id - - description - - return_type - title: RegisterScoringFunctionRequest ScoreRequest: - type: object properties: input_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to score. + type: array + title: Input Rows scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions + type: object required: - - input_rows - - scoring_functions + - input_rows + - scoring_functions title: ScoreRequest ScoreResponse: - type: object properties: results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult. - additionalProperties: false + type: object + title: Results + type: object required: - - results + - results title: ScoreResponse description: The response from scoring. ScoringResult: - type: object properties: score_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. + type: array + title: Score Rows aggregated_results: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false + title: Aggregated Results + type: object required: - - score_rows - - aggregated_results + - score_rows + - aggregated_results title: ScoringResult description: A scoring result for a single row. ScoreBatchRequest: - type: object properties: dataset_id: type: string - description: The ID of the dataset to score. + title: Dataset Id scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions save_results_dataset: type: boolean - description: >- - Whether to save the results to a dataset. - additionalProperties: false + title: Save Results Dataset + default: false + type: object required: - - dataset_id - - scoring_functions - - save_results_dataset + - dataset_id + - scoring_functions title: ScoreBatchRequest ScoreBatchResponse: - type: object properties: dataset_id: - type: string - description: >- - (Optional) The identifier of the dataset that was scored + anyOf: + - type: string + - type: 'null' results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult - additionalProperties: false - required: - - results - title: ScoreBatchResponse - description: >- - Response from batch scoring operations on datasets. - Shield: + type: object + title: Results type: object + required: + - results + title: ScoreBatchResponse + description: Response from batch scoring operations on datasets. + Shield: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: shield + title: Type default: shield - description: The resource type, always shield params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Configuration parameters for the shield - additionalProperties: false - required: - - identifier - - provider_id - - type - title: Shield - description: >- - A safety shield resource that can be used to check content. - ListShieldsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: Shield + description: A safety shield resource that can be used to check content. + ListShieldsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Shield' - additionalProperties: false - required: - - data - title: ListShieldsResponse - RegisterShieldRequest: - type: object - properties: - shield_id: - type: string - description: >- - The identifier of the shield to register. - provider_shield_id: - type: string - description: >- - The identifier of the shield in the provider. - provider_id: - type: string - description: The identifier of the provider. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false - required: - - shield_id - title: RegisterShieldRequest - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - ImageContentItem: + title: Data type: object + required: + - data + title: ListShieldsResponse + InvokeToolRequest: + properties: + tool_name: + type: string + title: Tool Name + kwargs: + additionalProperties: true + type: object + title: Kwargs + authorization: + anyOf: + - type: string + - type: 'null' + type: object + required: + - tool_name + - kwargs + title: InvokeToolRequest + ImageContentItem: + description: A image content item properties: type: - type: string const: image default: image - description: >- - Discriminator type of the content item. Always "image" + title: Type + type: string image: - type: object - properties: - url: - $ref: '#/components/schemas/URL' - description: >- - A URL of the image or data URL in the format of data:image/{type};base64,{data}. - Note that URL could have length limits. - data: - type: string - contentEncoding: base64 - description: base64 encoded image data as string - additionalProperties: false - description: >- - Image as a base64 encoded string or an URL - additionalProperties: false + $ref: '#/components/schemas/_URLOrData' required: - - type - - image + - image title: ImageContentItem - description: A image content item + type: object InterleavedContent: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - InterleavedContentItem: - oneOf: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + InterleavedContentItem: discriminator: - propertyName: type mapping: image: '#/components/schemas/ImageContentItem' text: '#/components/schemas/TextContentItem' - Message: + propertyName: type oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/UserMessage' - system: '#/components/schemas/SystemMessage' - tool: '#/components/schemas/ToolResponseMessage' - assistant: '#/components/schemas/CompletionMessage' - SystemMessage: - type: object - properties: - role: - type: string - const: system - default: system - description: >- - Must be "system" to identify this as a system message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). - additionalProperties: false - required: - - role - - content - title: SystemMessage - description: >- - A system message providing instructions or context to the model. + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem TextContentItem: - type: object properties: type: type: string const: text + title: Type default: text - description: >- - Discriminator type of the content item. Always "text" text: type: string - description: Text content - additionalProperties: false + title: Text + type: object required: - - type - - text + - text title: TextContentItem description: A text content item - ToolCall: - type: object + ToolInvocationResult: properties: - call_id: - type: string - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - arguments: - type: string - additionalProperties: false - required: - - call_id - - tool_name - - arguments - title: ToolCall - ToolResponseMessage: - type: object - properties: - role: - type: string - const: tool - default: tool - description: >- - Must be "tool" to identify this as a tool response - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - additionalProperties: false - required: - - role - - call_id - - content - title: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - URL: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem-Output | TextContentItem] + error_message: + anyOf: + - type: string + - type: 'null' + error_code: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: ToolInvocationResult + description: Result of a tool invocation. + URL: properties: uri: type: string - description: The URL string pointing to the resource - additionalProperties: false + title: Uri + type: object required: - - uri + - uri title: URL description: A URL reference to external content. - UserMessage: - type: object - properties: - role: - type: string - const: user - default: user - description: >- - Must be "user" to identify this as a user message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the message, which can include text and other media - context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) This field is used internally by Llama Stack to pass RAG context. - This field may be removed in the API in the future. - additionalProperties: false - required: - - role - - content - title: UserMessage - description: >- - A message from the user in a chat conversation. - SyntheticDataGenerateRequest: - type: object - properties: - dialogs: - type: array - items: - $ref: '#/components/schemas/Message' - description: >- - List of conversation messages to use as input for synthetic data generation - filtering_function: - type: string - enum: - - none - - random - - top_k - - top_p - - top_k_top_p - - sigmoid - description: >- - Type of filtering to apply to generated synthetic data samples - model: - type: string - description: >- - (Optional) The identifier of the model to use. The model must be registered - with Llama Stack and available via the /models endpoint - additionalProperties: false - required: - - dialogs - - filtering_function - title: SyntheticDataGenerateRequest - SyntheticDataGenerationResponse: - type: object - properties: - synthetic_data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - List of generated synthetic data samples that passed the filtering criteria - statistics: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Statistical information about the generation process and filtering - results - additionalProperties: false - required: - - synthetic_data - title: SyntheticDataGenerationResponse - description: >- - Response from the synthetic data generation. Batch of (prompt, response, score) - tuples that pass the threshold. - InvokeToolRequest: - type: object - properties: - tool_name: - type: string - description: The name of the tool to invoke. - kwargs: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool. - additionalProperties: false - required: - - tool_name - - kwargs - title: InvokeToolRequest - ToolInvocationResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The output content from the tool execution - error_message: - type: string - description: >- - (Optional) Error message if the tool execution failed - error_code: - type: integer - description: >- - (Optional) Numeric error code if the tool execution failed - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool execution - additionalProperties: false - title: ToolInvocationResult - description: Result of a tool invocation. ToolDef: - type: object properties: toolgroup_id: - type: string - description: >- - (Optional) ID of the tool group this tool belongs to + anyOf: + - type: string + - type: 'null' name: type: string - description: Name of the tool + title: Name description: - type: string - description: >- - (Optional) Human-readable description of what the tool does + anyOf: + - type: string + - type: 'null' input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool inputs (MCP inputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' output_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool outputs (MCP outputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - name - title: ToolDef - description: >- - Tool definition used in runtime contexts. - ListToolDefsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - name + title: ToolDef + description: Tool definition used in runtime contexts. + ListToolDefsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolDef' - description: List of tool definitions - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolDefsResponse - description: >- - Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. + description: Response containing a list of tool definitions. ToolGroup: - type: object properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: tool_group + title: Type default: tool_group - description: Type of resource, always 'tool_group' mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - (Optional) Model Context Protocol endpoint for remote tools + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional arguments for the tool group - additionalProperties: false - required: - - identifier - - provider_id - - type - title: ToolGroup - description: >- - A group of related tools managed together. - ListToolGroupsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: ToolGroup + description: A group of related tools managed together. + ListToolGroupsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolGroup' - description: List of tool groups - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolGroupsResponse - description: >- - Response containing a list of tool groups. - RegisterToolGroupRequest: - type: object - properties: - toolgroup_id: - type: string - description: The ID of the tool group to register. - provider_id: - type: string - description: >- - The ID of the provider to use for the tool group. - mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - The MCP endpoint to use for the tool group. - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool group. - additionalProperties: false - required: - - toolgroup_id - - provider_id - title: RegisterToolGroupRequest + description: Response containing a list of tool groups. Chunk: - type: object + description: A chunk of content that can be inserted into a vector database. properties: content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, or other - types. - metadata: - type: object - additionalProperties: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk that will be used in the model context - during inference. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - stored_chunk_id: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + chunk_id: + title: Chunk Id type: string - description: >- - The chunk ID that is stored in the vector database. Used for backend functionality. + metadata: + additionalProperties: true + title: Metadata + type: object + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true chunk_metadata: - $ref: '#/components/schemas/ChunkMetadata' - description: >- - Metadata for the chunk that will NOT be used in the context during inference. - The `chunk_metadata` is required backend functionality. - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + nullable: true + title: ChunkMetadata required: - - content - - metadata + - content + - chunk_id title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. - ChunkMetadata: type: object + ChunkMetadata: properties: chunk_id: - type: string - description: >- - The ID of the chunk. If not set, it will be generated based on the document - ID and content. + anyOf: + - type: string + - type: 'null' document_id: - type: string - description: >- - The ID of the document this chunk belongs to. + anyOf: + - type: string + - type: 'null' source: - type: string - description: >- - The source of the content, such as a URL, file path, or other identifier. + anyOf: + - type: string + - type: 'null' created_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was created. + anyOf: + - type: integer + - type: 'null' updated_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was last updated. + anyOf: + - type: integer + - type: 'null' chunk_window: - type: string - description: >- - The window of the chunk, which can be used to group related chunks together. + anyOf: + - type: string + - type: 'null' chunk_tokenizer: - type: string - description: >- - The tokenizer used to create the chunk. Default is Tiktoken. + anyOf: + - type: string + - type: 'null' chunk_embedding_model: - type: string - description: >- - The embedding model used to create the chunk's embedding. + anyOf: + - type: string + - type: 'null' chunk_embedding_dimension: - type: integer - description: >- - The dimension of the embedding vector for the chunk. + anyOf: + - type: integer + - type: 'null' content_token_count: - type: integer - description: >- - The number of tokens in the content of the chunk. + anyOf: + - type: integer + - type: 'null' metadata_token_count: - type: integer - description: >- - The number of tokens in the metadata of the chunk. - additionalProperties: false - title: ChunkMetadata - description: >- - `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional - information about the chunk that will not be used in the context during - inference, but is required for backend functionality. The `ChunkMetadata` is - set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not - expected to change after. Use `Chunk.metadata` for metadata that will - be used in the context during inference. - InsertChunksRequest: + anyOf: + - type: integer + - type: 'null' type: object + title: ChunkMetadata + description: |- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + InsertChunksRequest: properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to insert the chunks into. + title: Vector Store Id chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - The chunks to insert. Each `Chunk` should contain content which can be - interleaved text, images, or other types. `metadata`: `dict[str, Any]` - and `embedding`: `List[float]` are optional. If `metadata` is provided, - you configure how Llama Stack formats the chunk during generation. If - `embedding` is not provided, it will be computed later. + $ref: '#/components/schemas/Chunk-Input' + type: array + title: Chunks ttl_seconds: - type: integer - description: The time to live of the chunks. - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + type: object required: - - vector_db_id - - chunks + - vector_store_id + - chunks title: InsertChunksRequest QueryChunksRequest: - type: object properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to query. + title: Vector Store Id query: - $ref: '#/components/schemas/InterleavedContent' - description: The query to search for. + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the query. - additionalProperties: false + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object required: - - vector_db_id - - query + - vector_store_id + - query title: QueryChunksRequest QueryChunksResponse: - type: object properties: chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - List of content chunks returned from the query - scores: + $ref: '#/components/schemas/Chunk-Output' type: array + title: Chunks + scores: items: type: number - description: >- - Relevance scores corresponding to each returned chunk - additionalProperties: false - required: - - chunks - - scores - title: QueryChunksResponse - description: >- - Response from querying chunks in a vector database. - VectorStoreFileCounts: + type: array + title: Scores type: object + required: + - chunks + - scores + title: QueryChunksResponse + description: Response from querying chunks in a vector database. + VectorStoreFileCounts: properties: completed: type: integer - description: >- - Number of files that have been successfully processed + title: Completed cancelled: type: integer - description: >- - Number of files that had their processing cancelled + title: Cancelled failed: type: integer - description: Number of files that failed to process + title: Failed in_progress: type: integer - description: >- - Number of files currently being processed + title: In Progress total: type: integer - description: >- - Total number of files in the vector store - additionalProperties: false - required: - - completed - - cancelled - - failed - - in_progress - - total - title: VectorStoreFileCounts - description: >- - File processing status counts for a vector store. - VectorStoreListResponse: + title: Total type: object + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: File processing status counts for a vector store. + VectorStoreListResponse: properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreObject' - description: List of vector store objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first vector store in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last vector store in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more vector stores available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreListResponse description: Response from listing vector stores. VectorStoreObject: - type: object properties: id: type: string - description: Unique identifier for the vector store + title: Id object: type: string + title: Object default: vector_store - description: >- - Object type identifier, always "vector_store" created_at: type: integer - description: >- - Timestamp when the vector store was created + title: Created At name: - type: string - description: (Optional) Name of the vector store + anyOf: + - type: string + - type: 'null' usage_bytes: type: integer + title: Usage Bytes default: 0 - description: >- - Storage space used by the vector store in bytes file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the vector store status: type: string + title: Status default: completed - description: Current status of the vector store expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store + anyOf: + - additionalProperties: true + type: object + - type: 'null' expires_at: - type: integer - description: >- - (Optional) Timestamp when the vector store will expire + anyOf: + - type: integer + - type: 'null' last_active_at: - type: integer - description: >- - (Optional) Timestamp of last activity on the vector store + anyOf: + - type: integer + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false + title: Metadata + type: object required: - - id - - object - - created_at - - usage_bytes - - file_counts - - status - - metadata + - id + - created_at + - file_counts title: VectorStoreObject description: OpenAI Vector Store object. - "OpenAICreateVectorStoreRequestWithExtraBody": - type: object - properties: - name: - type: string - description: (Optional) A name for the vector store - file_ids: - type: array - items: - type: string - description: >- - List of file IDs to include in the vector store - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store - chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Strategy for splitting files into chunks - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false - title: >- - OpenAICreateVectorStoreRequestWithExtraBody - description: >- - Request to create a vector store with extra_body support. - OpenaiUpdateVectorStoreRequest: - type: object - properties: - name: - type: string - description: The name of the vector store. - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The expiration policy for a vector store. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of 16 key-value pairs that can be attached to an object. - additionalProperties: false - title: OpenaiUpdateVectorStoreRequest - VectorStoreDeleteResponse: - type: object - properties: - id: - type: string - description: >- - Unique identifier of the deleted vector store - object: - type: string - default: vector_store.deleted - description: >- - Object type identifier for the deletion response - deleted: - type: boolean - default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreDeleteResponse - description: Response from deleting a vector store. VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' discriminator: - propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + propertyName: type + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic VectorStoreChunkingStrategyAuto: - type: object properties: type: type: string const: auto + title: Type default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: type: object + title: VectorStoreChunkingStrategyAuto + description: Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: properties: type: type: string const: static + title: Type default: static - description: >- - Strategy type, always "static" for static chunking static: $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: type: object + required: + - static + title: VectorStoreChunkingStrategyStatic + description: Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: properties: chunk_overlap_tokens: type: integer + title: Chunk Overlap Tokens default: 400 - description: >- - Number of tokens to overlap between adjacent chunks max_chunk_size_tokens: type: integer + maximum: 4096.0 + minimum: 100.0 + title: Max Chunk Size Tokens default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens + type: object title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. - "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": - type: object + description: Configuration for static chunking strategy. + OpenAICreateVectorStoreRequestWithExtraBody: properties: + name: + anyOf: + - type: string + - type: 'null' file_ids: - type: array - items: - type: string - description: >- - A list of File IDs that the vector store should use - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes to store with the files + anyOf: + - items: + type: string + type: array + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto - additionalProperties: false - required: - - file_ids - title: >- - OpenAICreateVectorStoreFileBatchRequestWithExtraBody - description: >- - Request to create a vector store file batch with extra_body support. - VectorStoreFileBatchObject: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + additionalProperties: true type: object + title: OpenAICreateVectorStoreRequestWithExtraBody + description: Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + properties: + name: + anyOf: + - type: string + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: properties: id: type: string - description: Unique identifier for the file batch + title: Id object: type: string + title: Object + default: vector_store.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + OpenAICreateVectorStoreFileBatchRequestWithExtraBody: + properties: + file_ids: + items: + type: string + type: array + title: File Ids + attributes: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + additionalProperties: true + type: object + required: + - file_ids + title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object default: vector_store.file_batch - description: >- - Object type identifier, always "vector_store.file_batch" created_at: type: integer - description: >- - Timestamp when the file batch was created + title: Created At vector_store_id: type: string - description: >- - ID of the vector store containing the file batch + title: Vector Store Id status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: >- - Current processing status of the file batch + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the batch - additionalProperties: false + type: object required: - - id - - object - - created_at - - vector_store_id - - status - - file_counts + - id + - created_at + - vector_store_id + - status + - file_counts title: VectorStoreFileBatchObject description: OpenAI Vector Store File Batch object. VectorStoreFileStatus: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed VectorStoreFileLastError: - type: object properties: code: - oneOf: - - type: string - const: server_error - - type: string - const: rate_limit_exceeded - description: >- - Error code indicating the type of failure + title: Code + type: string + enum: + - server_error + - rate_limit_exceeded + default: server_error message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: VectorStoreFileLastError - description: >- - Error information for failed vector store file processing. - VectorStoreFileObject: + title: Message type: object + required: + - code + - message + title: VectorStoreFileLastError + description: Error information for failed vector store file processing. + VectorStoreFileObject: properties: id: type: string - description: Unique identifier for the file + title: Id object: type: string + title: Object default: vector_store.file - description: >- - Object type identifier, always "vector_store.file" attributes: - type: object additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + title: string | number | boolean + propertyNames: + type: string + maxLength: 64 + type: object + maxProperties: 16 + title: Attributes + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers. + x-oaiTypeLabel: map chunking_strategy: oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic discriminator: propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - description: >- - Strategy used for splitting the file into chunks created_at: type: integer - description: >- - Timestamp when the file was added to the vector store + title: Created At last_error: - $ref: '#/components/schemas/VectorStoreFileLastError' - description: >- - (Optional) Error information if file processing failed + anyOf: + - $ref: '#/components/schemas/VectorStoreFileLastError' + title: VectorStoreFileLastError + - type: 'null' + title: VectorStoreFileLastError status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: Current processing status of the file + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed usage_bytes: type: integer + title: Usage Bytes default: 0 - description: Storage space used by this file in bytes vector_store_id: type: string - description: >- - ID of the vector store containing this file - additionalProperties: false + title: Vector Store Id + type: object required: - - id - - object - - attributes - - chunking_strategy - - created_at - - status - - usage_bytes - - vector_store_id + - id + - chunking_strategy + - created_at + - status + - vector_store_id title: VectorStoreFileObject description: OpenAI Vector Store File object. VectorStoreFilesListInBatchResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: >- - List of vector store file objects in the batch + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreFilesListInBatchResponse - description: >- - Response from listing files in a vector store file batch. + description: Response from listing files in a vector store file batch. VectorStoreListFilesResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: List of vector store file objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false - required: - - object - - data - - has_more - title: VectorStoreListFilesResponse - description: >- - Response from listing files in a vector store. - OpenaiAttachFileToVectorStoreRequest: type: object + required: + - data + title: VectorStoreListFilesResponse + description: Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: properties: file_id: type: string - description: >- - The ID of the file to attach to the vector store. + title: File Id attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The key-value attributes stored with the file, which can be used for filtering. + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - The chunking strategy to use for the file. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + type: object required: - - file_id + - file_id title: OpenaiAttachFileToVectorStoreRequest OpenaiUpdateVectorStoreFileRequest: - type: object properties: attributes: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The updated key-value attributes to store with the file. - additionalProperties: false + title: Attributes + type: object required: - - attributes + - attributes title: OpenaiUpdateVectorStoreFileRequest VectorStoreFileDeleteResponse: - type: object properties: id: type: string - description: Unique identifier of the deleted file + title: Id object: type: string + title: Object default: vector_store.file.deleted - description: >- - Object type identifier for the deletion response deleted: type: boolean + title: Deleted default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreFileDeleteResponse - description: >- - Response from deleting a vector store file. - VectorStoreContent: type: object + required: + - id + title: VectorStoreFileDeleteResponse + description: Response from deleting a vector store file. + VectorStoreContent: properties: type: type: string const: text - description: >- - Content type, currently only "text" is supported + title: Type text: type: string - description: The actual text content - additionalProperties: false - required: - - type - - text - title: VectorStoreContent - description: >- - Content item from a vector store file or search result. - VectorStoreFileContentsResponse: - type: object - properties: - file_id: - type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file - additionalProperties: false - required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse - description: >- - Response from retrieving the contents of a vector store file. - OpenaiSearchVectorStoreRequest: - type: object - properties: - query: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - The query string or array for performing the search. - filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Filters based on file attributes to narrow the search results. - max_num_results: - type: integer - description: >- - Maximum number of results to return (1 to 50 inclusive, default 10). - ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: + title: Text + embedding: + anyOf: + - items: type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - Ranking options for fine-tuning the search results. - rewrite_query: - type: boolean - description: >- - Whether to rewrite the natural language query for vector search (default - false) - search_mode: - type: string - description: >- - The search mode to use - "keyword", "vector", or "hybrid" (default "vector") - additionalProperties: false - required: - - query - title: OpenaiSearchVectorStoreRequest - VectorStoreSearchResponse: + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object - properties: - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: Relevance score for this search result - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: number - - type: boolean - description: >- - (Optional) Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: >- - List of content items matching the search query - additionalProperties: false required: - - file_id - - filename - - score - - content - title: VectorStoreSearchResponse - description: Response from searching a vector store. - VectorStoreSearchResponsePage: - type: object + - type + - text + title: VectorStoreContent + description: Content item from a vector store file or search result. + VectorStoreFileContentResponse: properties: object: type: string - default: vector_store.search_results.page - description: >- - Object type identifier for the search results page - search_query: - type: string - description: >- - The original search query that was executed + const: vector_store.file_content.page + title: Object + default: vector_store.file_content.page data: - type: array items: - $ref: '#/components/schemas/VectorStoreSearchResponse' - description: List of search result objects + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Data has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more results available beyond this page next_page: - type: string - description: >- - (Optional) Token for retrieving the next page of results - additionalProperties: false - required: - - object - - search_query - - data - - has_more - title: VectorStoreSearchResponsePage - description: >- - Paginated response from searching a vector store. - VersionInfo: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + title: VectorStoreFileContentResponse + description: Represents the parsed content of a vector store file. + OpenaiSearchVectorStoreRequest: + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + max_num_results: + anyOf: + - type: integer + - type: 'null' + default: 10 + ranking_options: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions + rewrite_query: + anyOf: + - type: boolean + - type: 'null' + default: false + search_mode: + anyOf: + - type: string + - type: 'null' + default: vector + type: object + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + properties: + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + attributes: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: number + - type: boolean + title: string | number | boolean + type: object + - type: 'null' + content: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Content + type: object + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + properties: + object: + type: string + title: Object + default: vector_store.search_results.page + search_query: + items: + type: string + type: array + title: Search Query + data: + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - search_query + - data + title: VectorStoreSearchResponsePage + description: Paginated response from searching a vector store. + VersionInfo: properties: version: type: string - description: Version number of the service - additionalProperties: false + title: Version + type: object required: - - version + - version title: VersionInfo description: Version information for the service. AppendRowsRequest: - type: object properties: rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to append to the dataset. - additionalProperties: false + type: array + title: Rows + type: object required: - - rows + - rows title: AppendRowsRequest PaginatedResponse: - type: object properties: data: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The list of items for the current page + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more items available after this set + title: Has More url: - type: string - description: The URL for accessing this list - additionalProperties: false - required: - - data - - has_more - title: PaginatedResponse - description: >- - A generic paginated response that follows a simple format. - Dataset: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + - has_more + title: PaginatedResponse + description: A generic paginated response that follows a simple format. + Dataset: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: dataset + title: Type default: dataset - description: >- - Type of resource, always 'dataset' for datasets purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - Purpose of the dataset indicating its intended use + $ref: '#/components/schemas/DatasetPurpose' source: oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource discriminator: propertyName: type mapping: - uri: '#/components/schemas/URIDataSource' rows: '#/components/schemas/RowsDataSource' - description: >- - Data source configuration for the dataset + uri: '#/components/schemas/URIDataSource' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the dataset - additionalProperties: false - required: - - identifier - - provider_id - - type - - purpose - - source - - metadata - title: Dataset - description: >- - Dataset resource for storing and accessing training or evaluation data. - RowsDataSource: + title: Metadata + description: Any additional metadata for this dataset type: object + required: + - identifier + - provider_id + - purpose + - source + title: Dataset + description: Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: properties: type: type: string const: rows + title: Type default: rows rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}]} ] - additionalProperties: false + type: array + title: Rows + type: object required: - - type - - rows + - rows title: RowsDataSource description: A dataset stored in rows. URIDataSource: - type: object properties: type: type: string const: uri + title: Type default: uri uri: type: string - description: >- - The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" - additionalProperties: false - required: - - type - - uri - title: URIDataSource - description: >- - A dataset that can be obtained from a URI. - ListDatasetsResponse: + title: Uri type: object + required: + - uri + title: URIDataSource + description: A dataset that can be obtained from a URI. + ListDatasetsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Dataset' - description: List of datasets - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListDatasetsResponse description: Response from listing datasets. - DataSource: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - RegisterDatasetRequest: - type: object + Benchmark: properties: - purpose: + identifier: type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - The purpose of the dataset. One of: - "post-training/messages": The dataset - contains a messages column with list of messages for post-training. { - "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", - "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset - contains a question column and an answer column for evaluation. { "question": - "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": - The dataset contains a messages column with list of messages and an answer - column for evaluation. { "messages": [ {"role": "user", "content": "Hello, - my name is John Doe."}, {"role": "assistant", "content": "Hello, John - Doe. How can I help you today?"}, {"role": "user", "content": "What's - my name?"}, ], "answer": "John Doe" } - source: - $ref: '#/components/schemas/DataSource' - description: >- - The data source of the dataset. Ensure that the data source schema is - compatible with the purpose of the dataset. Examples: - { "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": - "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" - } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" - } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": - "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] - } ] } - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The metadata for the dataset. - E.g. {"description": "My dataset"}. - dataset_id: + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: type: string - description: >- - The ID of the dataset. If not provided, an ID will be generated. - additionalProperties: false - required: - - purpose - - source - title: RegisterDatasetRequest - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - description: >- - The model identifier to use for the agent - instructions: - type: string - description: The system instructions for the agent - name: - type: string - description: >- - Optional name for the agent, used in telemetry and identification - enable_session_persistence: - type: boolean - default: false - description: >- - Optional flag indicating whether session data has to be persisted - response_format: - $ref: '#/components/schemas/ResponseFormat' - description: Optional response format configuration - additionalProperties: false - required: - - model - - instructions - title: AgentConfig - description: Configuration for an agent. - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - GrammarResponseFormat: - type: object - properties: + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - json_schema - - grammar - description: >- - Must be "grammar" to identify this format type - const: grammar - default: grammar - bnf: + const: benchmark + title: Type + default: benchmark + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + metadata: + additionalProperties: true type: object + title: Metadata + description: Metadata for this evaluation task + type: object + required: + - identifier + - provider_id + - dataset_id + - scoring_functions + title: Benchmark + description: A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + properties: + data: + items: + $ref: '#/components/schemas/Benchmark' + type: array + title: Data + type: object + required: + - data + title: ListBenchmarksResponse + BenchmarkConfig: + properties: + eval_candidate: + $ref: '#/components/schemas/ModelCandidate' + scoring_params: additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The BNF grammar specification the response should conform to - additionalProperties: false - required: - - type - - bnf - title: GrammarResponseFormat - description: >- - Configuration for grammar-guided response generation. - GreedySamplingStrategy: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + type: object + title: Scoring Params + description: Map between scoring function id and parameters for each scoring function you want to run + num_examples: + anyOf: + - type: integer + - type: 'null' + description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated type: object + required: + - eval_candidate + title: BenchmarkConfig + description: A benchmark configuration for evaluation. + GreedySamplingStrategy: properties: type: type: string const: greedy + title: Type default: greedy - description: >- - Must be "greedy" to identify this sampling strategy - additionalProperties: false - required: - - type + type: object title: GreedySamplingStrategy - description: >- - Greedy sampling strategy that selects the highest probability token at each - step. - JsonSchemaResponseFormat: - type: object - properties: - type: - type: string - enum: - - json_schema - - grammar - description: >- - Must be "json_schema" to identify this format type - const: json_schema - default: json_schema - json_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. - additionalProperties: false - required: - - type - - json_schema - title: JsonSchemaResponseFormat - description: >- - Configuration for JSON schema-guided response generation. - ResponseFormat: - oneOf: - - $ref: '#/components/schemas/JsonSchemaResponseFormat' - - $ref: '#/components/schemas/GrammarResponseFormat' - discriminator: - propertyName: type - mapping: - json_schema: '#/components/schemas/JsonSchemaResponseFormat' - grammar: '#/components/schemas/GrammarResponseFormat' - SamplingParams: - type: object - properties: - strategy: - oneOf: - - $ref: '#/components/schemas/GreedySamplingStrategy' - - $ref: '#/components/schemas/TopPSamplingStrategy' - - $ref: '#/components/schemas/TopKSamplingStrategy' - discriminator: - propertyName: type - mapping: - greedy: '#/components/schemas/GreedySamplingStrategy' - top_p: '#/components/schemas/TopPSamplingStrategy' - top_k: '#/components/schemas/TopKSamplingStrategy' - description: The sampling strategy. - max_tokens: - type: integer - default: 0 - description: >- - The maximum number of tokens that can be generated in the completion. - The token count of your prompt plus max_tokens cannot exceed the model's - context length. - repetition_penalty: - type: number - default: 1.0 - description: >- - Number between -2.0 and 2.0. Positive values penalize new tokens based - on whether they appear in the text so far, increasing the model's likelihood - to talk about new topics. - stop: - type: array - items: - type: string - description: >- - Up to 4 sequences where the API will stop generating further tokens. The - returned text will not contain the stop sequence. - additionalProperties: false - required: - - strategy - title: SamplingParams - description: Sampling parameters. - ToolConfig: - type: object - properties: - tool_choice: - oneOf: - - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following - capabilities of the model. - - type: string - default: auto - description: >- - (Optional) Whether tool use is automatic, required, or none. Can also - specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - (Optional) Instructs the model how to format tool calls. By default, Llama - Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a - tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python - syntax -- a list of function calls. - system_message_behavior: - type: string - enum: - - append - - replace - description: >- - (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: - Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: - Replaces the default system prompt with the provided system message. The - system message can include the string '{{function_definitions}}' to indicate - where the function definitions should be inserted. - default: append - additionalProperties: false - title: ToolConfig - description: Configuration for tool use. - TopKSamplingStrategy: - type: object - properties: - type: - type: string - const: top_k - default: top_k - description: >- - Must be "top_k" to identify this sampling strategy - top_k: - type: integer - description: >- - Number of top tokens to consider for sampling. Must be at least 1 - additionalProperties: false - required: - - type - - top_k - title: TopKSamplingStrategy - description: >- - Top-k sampling strategy that restricts sampling to the k most likely tokens. - TopPSamplingStrategy: - type: object - properties: - type: - type: string - const: top_p - default: top_p - description: >- - Must be "top_p" to identify this sampling strategy - temperature: - type: number - description: >- - Controls randomness in sampling. Higher values increase randomness - top_p: - type: number - default: 0.95 - description: >- - Cumulative probability threshold for nucleus sampling. Defaults to 0.95 - additionalProperties: false - required: - - type - title: TopPSamplingStrategy - description: >- - Top-p (nucleus) sampling strategy that samples from the smallest set of tokens - with cumulative probability >= p. - CreateAgentRequest: - type: object - properties: - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: The configuration for the agent. - additionalProperties: false - required: - - agent_config - title: CreateAgentRequest - AgentCreateResponse: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the created agent - additionalProperties: false - required: - - agent_id - title: AgentCreateResponse - description: >- - Response returned when creating a new agent. - Agent: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the agent - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: Configuration settings for the agent - created_at: - type: string - format: date-time - description: Timestamp when the agent was created - additionalProperties: false - required: - - agent_id - - agent_config - - created_at - title: Agent - description: >- - An agent instance with configuration and metadata. - CreateAgentSessionRequest: - type: object - properties: - session_name: - type: string - description: The name of the session to create. - additionalProperties: false - required: - - session_name - title: CreateAgentSessionRequest - AgentSessionCreateResponse: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the created session - additionalProperties: false - required: - - session_id - title: AgentSessionCreateResponse - description: >- - Response returned when creating a new agent session. - InferenceStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: inference - default: inference - model_response: - $ref: '#/components/schemas/CompletionMessage' - description: The response from the LLM. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - model_response - title: InferenceStep - description: An inference step in an agent turn. - MemoryRetrievalStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: memory_retrieval - default: memory_retrieval - vector_db_ids: - type: string - description: >- - The IDs of the vector databases to retrieve context from. - inserted_context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The context retrieved from the vector databases. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - vector_db_ids - - inserted_context - title: MemoryRetrievalStep - description: >- - A memory retrieval step in an agent turn. - Session: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the conversation session - session_name: - type: string - description: Human-readable name for the session - turns: - type: array - items: - $ref: '#/components/schemas/Turn' - description: >- - List of all turns that have occurred in this session - started_at: - type: string - format: date-time - description: Timestamp when the session was created - additionalProperties: false - required: - - session_id - - session_name - - turns - - started_at - title: Session - description: >- - A single session of an interaction with an Agentic System. - ShieldCallStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: shield_call - default: shield_call - violation: - $ref: '#/components/schemas/SafetyViolation' - description: The violation from the shield call. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - title: ShieldCallStep - description: A shield call step in an agent turn. - ToolExecutionStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: tool_execution - default: tool_execution - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: The tool calls to execute. - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: The tool responses from the tool calls. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - tool_calls - - tool_responses - title: ToolExecutionStep - description: A tool execution step in an agent turn. - ToolResponse: - type: object - properties: - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - description: Name of the tool that was invoked - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool response - additionalProperties: false - required: - - call_id - - tool_name - - content - title: ToolResponse - description: Response from a tool invocation. - Turn: - type: object - properties: - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - session_id: - type: string - description: >- - Unique identifier for the conversation session - input_messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: >- - List of messages that initiated this turn - steps: - type: array - items: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - Ordered list of processing steps executed during this turn - output_message: - $ref: '#/components/schemas/CompletionMessage' - description: >- - The model's generated response containing content and metadata - output_attachments: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the attachment. - mime_type: - type: string - description: The MIME type of the attachment. - additionalProperties: false - required: - - content - - mime_type - title: Attachment - description: An attachment to an agent turn. - description: >- - (Optional) Files or media attached to the agent's response - started_at: - type: string - format: date-time - description: Timestamp when the turn began - completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the turn finished, if completed - additionalProperties: false - required: - - turn_id - - session_id - - input_messages - - steps - - output_message - - started_at - title: Turn - description: >- - A single turn in an interaction with an Agentic System. - CreateAgentTurnRequest: - type: object - properties: - messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: List of messages to start the turn with. - stream: - type: boolean - description: >- - (Optional) If True, generate an SSE event stream of the response. Defaults - to False. - documents: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - additionalProperties: false - required: - - content - - mime_type - title: Document - description: A document to be used by an agent. - description: >- - (Optional) List of documents to create the turn with. - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - description: >- - (Optional) List of toolgroups to create the turn with, will be used in - addition to the agent's config toolgroups for the request. - tool_config: - $ref: '#/components/schemas/ToolConfig' - description: >- - (Optional) The tool configuration to create the turn with, will be used - to override the agent's tool_config. - additionalProperties: false - required: - - messages - title: CreateAgentTurnRequest - AgentTurnResponseEvent: - type: object - properties: - payload: - oneOf: - - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - discriminator: - propertyName: event_type - mapping: - step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' - step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' - step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' - turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' - turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - description: >- - Event-specific payload containing event data - additionalProperties: false - required: - - payload - title: AgentTurnResponseEvent - description: >- - An event in an agent turn response stream. - AgentTurnResponseStepCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_complete - default: step_complete - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - step_details: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: Complete details of the executed step - additionalProperties: false - required: - - event_type - - step_type - - step_id - - step_details - title: AgentTurnResponseStepCompletePayload - description: >- - Payload for step completion events in agent turn responses. - AgentTurnResponseStepProgressPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_progress - default: step_progress - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - delta: - oneOf: - - $ref: '#/components/schemas/TextDelta' - - $ref: '#/components/schemas/ImageDelta' - - $ref: '#/components/schemas/ToolCallDelta' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/TextDelta' - image: '#/components/schemas/ImageDelta' - tool_call: '#/components/schemas/ToolCallDelta' - description: >- - Incremental content changes during step execution - additionalProperties: false - required: - - event_type - - step_type - - step_id - - delta - title: AgentTurnResponseStepProgressPayload - description: >- - Payload for step progress events in agent turn responses. - AgentTurnResponseStepStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_start - default: step_start - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata for the step - additionalProperties: false - required: - - event_type - - step_type - - step_id - title: AgentTurnResponseStepStartPayload - description: >- - Payload for step start events in agent turn responses. - AgentTurnResponseStreamChunk: - type: object - properties: - event: - $ref: '#/components/schemas/AgentTurnResponseEvent' - description: >- - Individual event in the agent turn response stream - additionalProperties: false - required: - - event - title: AgentTurnResponseStreamChunk - description: Streamed agent turn completion response. - "AgentTurnResponseTurnAwaitingInputPayload": - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_awaiting_input - default: turn_awaiting_input - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Turn data when waiting for external tool responses - additionalProperties: false - required: - - event_type - - turn - title: >- - AgentTurnResponseTurnAwaitingInputPayload - description: >- - Payload for turn awaiting input events in agent turn responses. - AgentTurnResponseTurnCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_complete - default: turn_complete - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Complete turn data including all steps and results - additionalProperties: false - required: - - event_type - - turn - title: AgentTurnResponseTurnCompletePayload - description: >- - Payload for turn completion events in agent turn responses. - AgentTurnResponseTurnStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_start - default: turn_start - description: Type of event being reported - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - additionalProperties: false - required: - - event_type - - turn_id - title: AgentTurnResponseTurnStartPayload - description: >- - Payload for turn start events in agent turn responses. - ImageDelta: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the delta. Always "image" - image: - type: string - contentEncoding: base64 - description: The incremental image data as bytes - additionalProperties: false - required: - - type - - image - title: ImageDelta - description: >- - An image content delta for streaming responses. - TextDelta: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the delta. Always "text" - text: - type: string - description: The incremental text content - additionalProperties: false - required: - - type - - text - title: TextDelta - description: >- - A text content delta for streaming responses. - ToolCallDelta: - type: object - properties: - type: - type: string - const: tool_call - default: tool_call - description: >- - Discriminator type of the delta. Always "tool_call" - tool_call: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCall' - description: >- - Either an in-progress tool call string or the final parsed tool call - parse_status: - type: string - enum: - - started - - in_progress - - failed - - succeeded - description: Current parsing status of the tool call - additionalProperties: false - required: - - type - - tool_call - - parse_status - title: ToolCallDelta - description: >- - A tool call content delta for streaming responses. - ResumeAgentTurnRequest: - type: object - properties: - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: >- - The tool call responses to resume the turn with. - stream: - type: boolean - description: Whether to stream the response. - additionalProperties: false - required: - - tool_responses - title: ResumeAgentTurnRequest - AgentStepResponse: - type: object - properties: - step: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - The complete step data and execution details - additionalProperties: false - required: - - step - title: AgentStepResponse - description: >- - Response containing details of a specific agent step. - Benchmark: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: benchmark - default: benchmark - description: The resource type, always benchmark - dataset_id: - type: string - description: >- - Identifier of the dataset to use for the benchmark evaluation - scoring_functions: - type: array - items: - type: string - description: >- - List of scoring function identifiers to apply during evaluation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Metadata for this evaluation task - additionalProperties: false - required: - - identifier - - provider_id - - type - - dataset_id - - scoring_functions - - metadata - title: Benchmark - description: >- - A benchmark resource for evaluating model performance. - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - RegisterBenchmarkRequest: - type: object - properties: - benchmark_id: - type: string - description: The ID of the benchmark to register. - dataset_id: - type: string - description: >- - The ID of the dataset to use for the benchmark. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the benchmark. - provider_benchmark_id: - type: string - description: >- - The ID of the provider benchmark to use for the benchmark. - provider_id: - type: string - description: >- - The ID of the provider to use for the benchmark. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The metadata to use for the benchmark. - additionalProperties: false - required: - - benchmark_id - - dataset_id - - scoring_functions - title: RegisterBenchmarkRequest - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - BenchmarkConfig: - type: object - properties: - eval_candidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - description: The candidate to evaluate. - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - Map between scoring function id and parameters for each scoring function - you want to run - num_examples: - type: integer - description: >- - (Optional) The number of examples to evaluate. If not provided, all examples - in the dataset will be evaluated - additionalProperties: false - required: - - eval_candidate - - scoring_params - title: BenchmarkConfig - description: >- - A benchmark configuration for evaluation. + description: Greedy sampling strategy that selects the highest probability token at each step. ModelCandidate: - type: object properties: type: type: string const: model + title: Type default: model model: type: string - description: The model ID to evaluate. + title: Model sampling_params: $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/SystemMessage' + title: SystemMessage + - type: 'null' + title: SystemMessage + type: object required: - - type - - model - - sampling_params + - model + - sampling_params title: ModelCandidate description: A model candidate for evaluation. - EvaluateRowsRequest: + SamplingParams: + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + max_tokens: + anyOf: + - type: integer + - type: 'null' + repetition_penalty: + anyOf: + - type: number + - type: 'null' + default: 1.0 + stop: + anyOf: + - items: + type: string + type: array + - type: 'null' type: object + title: SamplingParams + description: Sampling parameters. + SystemMessage: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + type: object + required: + - content + title: SystemMessage + description: A system message providing instructions or context to the model. + TopKSamplingStrategy: + properties: + type: + type: string + const: top_k + title: Type + default: top_k + top_k: + type: integer + minimum: 1.0 + title: Top K + type: object + required: + - top_k + title: TopKSamplingStrategy + description: Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + properties: + type: + type: string + const: top_p + title: Type + default: top_p + temperature: + anyOf: + - type: number + minimum: 0.0 + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + default: 0.95 + type: object + required: + - temperature + title: TopPSamplingStrategy + description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + EvaluateRowsRequest: properties: input_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to evaluate. - scoring_functions: type: array + title: Input Rows + scoring_functions: items: type: string - description: >- - The scoring functions to use for the evaluation. + type: array + title: Scoring Functions benchmark_config: $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false + type: object required: - - input_rows - - scoring_functions - - benchmark_config + - input_rows + - scoring_functions + - benchmark_config title: EvaluateRowsRequest EvaluateResponse: - type: object properties: generations: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The generations from the evaluation. + type: array + title: Generations scores: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: The scores from the evaluation. - additionalProperties: false + type: object + title: Scores + type: object required: - - generations - - scores + - generations + - scores title: EvaluateResponse description: The response from an evaluation. RunEvalRequest: - type: object properties: benchmark_config: $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false + type: object required: - - benchmark_config + - benchmark_config title: RunEvalRequest Job: - type: object properties: job_id: type: string - description: Unique identifier for the job + title: Job Id status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current execution status of the job - additionalProperties: false - required: - - job_id - - status - title: Job - description: >- - A job execution instance with status tracking. - RerankRequest: + $ref: '#/components/schemas/JobStatus' type: object + required: + - job_id + - status + title: Job + description: A job execution instance with status tracking. + RerankRequest: properties: model: type: string - description: >- - The identifier of the reranking model to use. + title: Model query: - oneOf: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + items: + items: + anyOf: - type: string - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - The search query to rank items against. Can be a string, text content - part, or image content part. The input must not exceed the model's max - input token length. - items: + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam type: array - items: - oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - List of items to rerank. Each item can be a string, text content part, - or image content part. Each input must not exceed the model's max input - token length. + title: Items max_num_results: - type: integer - description: >- - (Optional) Maximum number of results to return. Default: returns all. - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + type: object required: - - model - - query - - items + - model + - query + - items title: RerankRequest RerankData: - type: object properties: index: type: integer - description: >- - The original index of the document in the input list + title: Index relevance_score: type: number - description: >- - The relevance score from the model output. Values are inverted when applicable - so that higher scores indicate greater relevance. - additionalProperties: false - required: - - index - - relevance_score - title: RerankData - description: >- - A single rerank result from a reranking response. - RerankResponse: + title: Relevance Score type: object + required: + - index + - relevance_score + title: RerankData + description: A single rerank result from a reranking response. + RerankResponse: properties: data: - type: array items: $ref: '#/components/schemas/RerankData' - description: >- - List of rerank result objects, sorted by relevance score (descending) - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: RerankResponse description: Response from a reranking request. Checkpoint: - type: object properties: identifier: type: string - description: Unique identifier for the checkpoint + title: Identifier created_at: type: string format: date-time - description: >- - Timestamp when the checkpoint was created + title: Created At epoch: type: integer - description: >- - Training epoch when the checkpoint was saved + title: Epoch post_training_job_id: type: string - description: >- - Identifier of the training job that created this checkpoint + title: Post Training Job Id path: type: string - description: >- - File system path where the checkpoint is stored + title: Path training_metrics: - $ref: '#/components/schemas/PostTrainingMetric' - description: >- - (Optional) Training metrics associated with this checkpoint - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/PostTrainingMetric' + title: PostTrainingMetric + - type: 'null' + title: PostTrainingMetric + type: object required: - - identifier - - created_at - - epoch - - post_training_job_id - - path + - identifier + - created_at + - epoch + - post_training_job_id + - path title: Checkpoint description: Checkpoint created during training runs. PostTrainingJobArtifactsResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - checkpoints + - job_uuid title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. PostTrainingMetric: - type: object properties: epoch: type: integer - description: Training epoch number + title: Epoch train_loss: type: number - description: Loss value on the training dataset + title: Train Loss validation_loss: type: number - description: Loss value on the validation dataset + title: Validation Loss perplexity: type: number - description: >- - Perplexity metric indicating model confidence - additionalProperties: false - required: - - epoch - - train_loss - - validation_loss - - perplexity - title: PostTrainingMetric - description: >- - Training metrics captured during post-training jobs. - CancelTrainingJobRequest: + title: Perplexity type: object + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: Training metrics captured during post-training jobs. + CancelTrainingJobRequest: properties: job_uuid: type: string - description: The UUID of the job to cancel. - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: CancelTrainingJobRequest PostTrainingJobStatusResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current status of the training job + $ref: '#/components/schemas/JobStatus' scheduled_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job was scheduled + anyOf: + - type: string + format: date-time + - type: 'null' started_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job execution began + anyOf: + - type: string + format: date-time + - type: 'null' completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job finished, if completed + anyOf: + - type: string + format: date-time + - type: 'null' resources_allocated: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Information about computational resources allocated to the - job + anyOf: + - additionalProperties: true + type: object + - type: 'null' checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - status - - checkpoints + - job_uuid + - status title: PostTrainingJobStatusResponse description: Status of a finetuning job. ListPostTrainingJobsResponse: - type: object properties: data: - type: array items: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - additionalProperties: false + $ref: '#/components/schemas/PostTrainingJob' + type: array + title: Data + type: object required: - - data + - data title: ListPostTrainingJobsResponse DPOAlignmentConfig: - type: object properties: beta: type: number - description: Temperature parameter for the DPO loss + title: Beta loss_type: $ref: '#/components/schemas/DPOLossType' default: sigmoid - description: The type of loss function to use for DPO - additionalProperties: false + type: object required: - - beta - - loss_type + - beta title: DPOAlignmentConfig - description: >- - Configuration for Direct Preference Optimization (DPO) alignment. + description: Configuration for Direct Preference Optimization (DPO) alignment. DPOLossType: type: string enum: - - sigmoid - - hinge - - ipo - - kto_pair + - sigmoid + - hinge + - ipo + - kto_pair title: DPOLossType DataConfig: - type: object properties: dataset_id: type: string - description: >- - Unique identifier for the training dataset + title: Dataset Id batch_size: type: integer - description: Number of samples per training batch + title: Batch Size shuffle: type: boolean - description: >- - Whether to shuffle the dataset during training + title: Shuffle data_format: $ref: '#/components/schemas/DatasetFormat' - description: >- - Format of the dataset (instruct or dialog) validation_dataset_id: - type: string - description: >- - (Optional) Unique identifier for the validation dataset + anyOf: + - type: string + - type: 'null' packed: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to pack multiple samples into a single sequence for - efficiency train_on_input: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to compute loss on input tokens as well as output tokens - additionalProperties: false + type: object required: - - dataset_id - - batch_size - - shuffle - - data_format + - dataset_id + - batch_size + - shuffle + - data_format title: DataConfig - description: >- - Configuration for training data and data loading. + description: Configuration for training data and data loading. DatasetFormat: type: string enum: - - instruct - - dialog + - instruct + - dialog title: DatasetFormat description: Format of the training dataset. EfficiencyConfig: - type: object properties: enable_activation_checkpointing: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use activation checkpointing to reduce memory usage enable_activation_offloading: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload activations to CPU to save GPU memory memory_efficient_fsdp_wrap: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use memory-efficient FSDP wrapping fsdp_cpu_offload: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload FSDP parameters to CPU - additionalProperties: false - title: EfficiencyConfig - description: >- - Configuration for memory and compute efficiency optimizations. - OptimizerConfig: type: object + title: EfficiencyConfig + description: Configuration for memory and compute efficiency optimizations. + OptimizerConfig: properties: optimizer_type: $ref: '#/components/schemas/OptimizerType' - description: >- - Type of optimizer to use (adam, adamw, or sgd) lr: type: number - description: Learning rate for the optimizer + title: Lr weight_decay: type: number - description: >- - Weight decay coefficient for regularization + title: Weight Decay num_warmup_steps: type: integer - description: Number of steps for learning rate warmup - additionalProperties: false + title: Num Warmup Steps + type: object required: - - optimizer_type - - lr - - weight_decay - - num_warmup_steps + - optimizer_type + - lr + - weight_decay + - num_warmup_steps title: OptimizerConfig - description: >- - Configuration parameters for the optimization algorithm. + description: Configuration parameters for the optimization algorithm. OptimizerType: type: string enum: - - adam - - adamw - - sgd + - adam + - adamw + - sgd title: OptimizerType - description: >- - Available optimizer algorithms for training. + description: Available optimizer algorithms for training. TrainingConfig: - type: object properties: n_epochs: type: integer - description: Number of training epochs to run + title: N Epochs max_steps_per_epoch: type: integer + title: Max Steps Per Epoch default: 1 - description: Maximum number of steps to run per epoch gradient_accumulation_steps: type: integer + title: Gradient Accumulation Steps default: 1 - description: >- - Number of steps to accumulate gradients before updating max_validation_steps: - type: integer + anyOf: + - type: integer + - type: 'null' default: 1 - description: >- - (Optional) Maximum number of validation steps per epoch data_config: - $ref: '#/components/schemas/DataConfig' - description: >- - (Optional) Configuration for data loading and formatting + anyOf: + - $ref: '#/components/schemas/DataConfig' + title: DataConfig + - type: 'null' + title: DataConfig optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - description: >- - (Optional) Configuration for the optimization algorithm + anyOf: + - $ref: '#/components/schemas/OptimizerConfig' + title: OptimizerConfig + - type: 'null' + title: OptimizerConfig efficiency_config: - $ref: '#/components/schemas/EfficiencyConfig' - description: >- - (Optional) Configuration for memory and compute optimizations + anyOf: + - $ref: '#/components/schemas/EfficiencyConfig' + title: EfficiencyConfig + - type: 'null' + title: EfficiencyConfig dtype: - type: string + anyOf: + - type: string + - type: 'null' default: bf16 - description: >- - (Optional) Data type for model parameters (bf16, fp16, fp32) - additionalProperties: false - required: - - n_epochs - - max_steps_per_epoch - - gradient_accumulation_steps - title: TrainingConfig - description: >- - Comprehensive configuration for the training process. - PreferenceOptimizeRequest: type: object + required: + - n_epochs + title: TrainingConfig + description: Comprehensive configuration for the training process. + PreferenceOptimizeRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid finetuned_model: type: string - description: The model to fine-tune. + title: Finetuned Model algorithm_config: $ref: '#/components/schemas/DPOAlignmentConfig' - description: The algorithm configuration. training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - additionalProperties: false + title: Logger Config + type: object required: - - job_uuid - - finetuned_model - - algorithm_config - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config title: PreferenceOptimizeRequest PostTrainingJob: - type: object properties: job_uuid: type: string - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: PostTrainingJob AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' discriminator: - propertyName: type mapping: LoRA: '#/components/schemas/LoraFinetuningConfig' QAT: '#/components/schemas/QATFinetuningConfig' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + title: LoraFinetuningConfig | QATFinetuningConfig LoraFinetuningConfig: - type: object properties: type: type: string const: LoRA + title: Type default: LoRA - description: Algorithm type identifier, always "LoRA" lora_attn_modules: - type: array items: type: string - description: >- - List of attention module names to apply LoRA to + type: array + title: Lora Attn Modules apply_lora_to_mlp: type: boolean - description: Whether to apply LoRA to MLP layers + title: Apply Lora To Mlp apply_lora_to_output: type: boolean - description: >- - Whether to apply LoRA to output projection layers + title: Apply Lora To Output rank: type: integer - description: >- - Rank of the LoRA adaptation (lower rank = fewer parameters) + title: Rank alpha: type: integer - description: >- - LoRA scaling parameter that controls adaptation strength + title: Alpha use_dora: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) quantize_base: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to quantize the base model weights - additionalProperties: false - required: - - type - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - title: LoraFinetuningConfig - description: >- - Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - QATFinetuningConfig: type: object + required: + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: properties: type: type: string const: QAT + title: Type default: QAT - description: Algorithm type identifier, always "QAT" quantizer_name: type: string - description: >- - Name of the quantization algorithm to use + title: Quantizer Name group_size: type: integer - description: Size of groups for grouped quantization - additionalProperties: false - required: - - type - - quantizer_name - - group_size - title: QATFinetuningConfig - description: >- - Configuration for Quantization-Aware Training (QAT) fine-tuning. - SupervisedFineTuneRequest: + title: Group Size type: object + required: + - quantizer_name + - group_size + title: QATFinetuningConfig + description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. + title: Logger Config model: - type: string - description: The model to fine-tune. + anyOf: + - type: string + - type: 'null' + description: Model descriptor for training if not in provider config` checkpoint_dir: - type: string - description: The directory to save checkpoint(s) to. + anyOf: + - type: string + - type: 'null' algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' - description: The algorithm configuration. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + title: LoraFinetuningConfig | QATFinetuningConfig + - type: 'null' + title: Algorithm Config + type: object required: - - job_uuid - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - training_config + - hyperparam_search_config + - logger_config title: SupervisedFineTuneRequest + RegisterModelRequest: + properties: + model_id: + type: string + title: Model Id + provider_model_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + model_type: + anyOf: + - $ref: '#/components/schemas/ModelType' + title: ModelType + - type: 'null' + title: ModelType + type: object + required: + - model_id + title: RegisterModelRequest + ParamType: + discriminator: + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + propertyName: type + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + RegisterScoringFunctionRequest: + properties: + scoring_fn_id: + type: string + title: Scoring Fn Id + description: + type: string + title: Description + return_type: + anyOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + provider_scoring_fn_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + type: object + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + RegisterShieldRequest: + properties: + shield_id: + type: string + title: Shield Id + provider_shield_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - shield_id + title: RegisterShieldRequest + RegisterToolGroupRequest: + properties: + toolgroup_id: + type: string + title: Toolgroup Id + provider_id: + type: string + title: Provider Id + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + DataSource: + discriminator: + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + propertyName: type + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + RegisterDatasetRequest: + properties: + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + anyOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + dataset_id: + anyOf: + - type: string + - type: 'null' + type: object + required: + - purpose + - source + title: RegisterDatasetRequest + RegisterBenchmarkRequest: + properties: + benchmark_id: + type: string + title: Benchmark Id + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + provider_benchmark_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + AllowedToolsFilter: + properties: + tool_names: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: AllowedToolsFilter + description: Filter configuration for restricting which MCP tools can be used. + ApprovalFilter: + properties: + always: + anyOf: + - items: + type: string + type: array + - type: 'null' + never: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: ApprovalFilter + description: Filter configuration for MCP tool approval requirements. + BatchError: + properties: + code: + anyOf: + - type: string + - type: 'null' + line: + anyOf: + - type: integer + - type: 'null' + message: + anyOf: + - type: string + - type: 'null' + param: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: BatchError + BatchRequestCounts: + properties: + completed: + type: integer + title: Completed + failed: + type: integer + title: Failed + total: + type: integer + title: Total + additionalProperties: true + type: object + required: + - completed + - failed + - total + title: BatchRequestCounts + BatchUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + input_tokens_details: + $ref: '#/components/schemas/InputTokensDetails' + output_tokens: + type: integer + title: Output Tokens + output_tokens_details: + $ref: '#/components/schemas/OutputTokensDetails' + total_tokens: + type: integer + title: Total Tokens + additionalProperties: true + type: object + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + Body_openai_upload_file_v1_files_post: + properties: + file: + type: string + format: binary + title: File + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + anyOf: + - $ref: '#/components/schemas/ExpiresAfter' + title: ExpiresAfter + - type: 'null' + title: ExpiresAfter + type: object + required: + - file + - purpose + title: Body_openai_upload_file_v1_files_post + Chunk-Input: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + Chunk-Output: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + title: string | list[ImageContentItem-Output | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + ConversationItemInclude: + type: string + enum: + - web_search_call.action.sources + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + title: ConversationItemInclude + description: Specify additional output data to include in the model response. + DatasetPurpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + title: DatasetPurpose + description: Purpose of the dataset. Each purpose has a required input data schema. + Errors: + properties: + data: + anyOf: + - items: + $ref: '#/components/schemas/BatchError' + type: array + - type: 'null' + object: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: Errors + HealthStatus: + type: string + enum: + - OK + - Error + - Not Implemented + title: HealthStatus + ImageContentItem-Input: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + ImageContentItem-Output: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + InputTokensDetails: + properties: + cached_tokens: + type: integer + title: Cached Tokens + additionalProperties: true + type: object + required: + - cached_tokens + title: InputTokensDetails + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus + description: Status of a job execution. + MCPListToolsTool: + properties: + input_schema: + additionalProperties: true + type: object + title: Input Schema + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_schema + - name + title: MCPListToolsTool + description: Tool definition returned by MCP list tools operation. + OpenAIAssistantMessageParam-Input: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIAssistantMessageParam-Output: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIChatCompletionUsageCompletionTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsageCompletionTokensDetails + description: Token details for output tokens in OpenAI chat completion usage. + OpenAIChatCompletionUsagePromptTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsagePromptTokensDetails + description: Token details for prompt tokens in OpenAI chat completion usage. + OpenAIResponseMessage-Input: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseMessage-Output: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseOutputMessageFileSearchToolCallResults: + properties: + attributes: + additionalProperties: true + type: object + title: Attributes + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + text: + type: string + title: Text + type: object + required: + - attributes + - file_id + - filename + - score + - text + title: OpenAIResponseOutputMessageFileSearchToolCallResults + description: Search results returned by the file search operation. + OpenAIResponseTextFormat: + properties: + type: + title: Type + type: string + enum: + - text + - json_schema + - json_object + default: text + name: + anyOf: + - type: string + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + title: OpenAIResponseTextFormat + description: Configuration for Responses API text format. + OpenAIResponseUsageInputTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageInputTokensDetails + description: Token details for input tokens in OpenAI response usage. + OpenAIResponseUsageOutputTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageOutputTokensDetails + description: Token details for output tokens in OpenAI response usage. + OpenAIUserMessageParam-Input: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OpenAIUserMessageParam-Output: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OutputTokensDetails: + properties: + reasoning_tokens: + type: integer + title: Reasoning Tokens + additionalProperties: true + type: object + required: + - reasoning_tokens + title: OutputTokensDetails + SearchRankingOptions: + properties: + ranker: + anyOf: + - type: string + - type: 'null' + score_threshold: + anyOf: + - type: number + - type: 'null' + default: 0.0 + type: object + title: SearchRankingOptions + description: Options for ranking and filtering search results. + _URLOrData: + properties: + url: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + data: + anyOf: + - type: string + - type: 'null' + contentEncoding: base64 + type: object + title: _URLOrData + description: A URL or a base64 encoded string + SamplingStrategy: + discriminator: + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + propertyName: type + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + GrammarResponseFormat: + description: Configuration for grammar-guided response generation. + properties: + type: + const: grammar + default: grammar + title: Type + type: string + bnf: + additionalProperties: true + title: Bnf + type: object + required: + - bnf + title: GrammarResponseFormat + type: object + JsonSchemaResponseFormat: + description: Configuration for JSON schema-guided response generation. + properties: + type: + const: json_schema + default: json_schema + title: Type + type: string + json_schema: + additionalProperties: true + title: Json Schema + type: object + required: + - json_schema + title: JsonSchemaResponseFormat + type: object + ResponseFormat: + discriminator: + mapping: + grammar: '#/components/schemas/GrammarResponseFormat' + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + propertyName: type + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + title: JsonSchemaResponseFormat + - $ref: '#/components/schemas/GrammarResponseFormat' + title: GrammarResponseFormat + title: JsonSchemaResponseFormat | GrammarResponseFormat + OpenAIResponseContentPart: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + MetricInResponse: + description: A metric value included in API responses. + properties: + metric: + title: Metric + type: string + value: + anyOf: + - type: integer + - type: number + title: integer | number + unit: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - metric + - value + title: MetricInResponse + type: object + TextDelta: + description: A text content delta for streaming responses. + properties: + type: + const: text + default: text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: TextDelta + type: object + ImageDelta: + description: An image content delta for streaming responses. + properties: + type: + const: image + default: image + title: Type + type: string + image: + format: binary + title: Image + type: string + required: + - image + title: ImageDelta + type: object + Fp8QuantizationConfig: + description: Configuration for 8-bit floating point quantization. + properties: + type: + const: fp8_mixed + default: fp8_mixed + title: Type + type: string + title: Fp8QuantizationConfig + type: object + Bf16QuantizationConfig: + description: Configuration for BFloat16 precision (typically no quantization). + properties: + type: + const: bf16 + default: bf16 + title: Type + type: string + title: Bf16QuantizationConfig + type: object + Int4QuantizationConfig: + description: Configuration for 4-bit integer quantization. + properties: + type: + const: int4_mixed + default: int4_mixed + title: Type + type: string + scheme: + anyOf: + - type: string + - type: 'null' + default: int4_weight_int8_dynamic_activation + title: Int4QuantizationConfig + type: object + UserMessage: + description: A message from the user in a chat conversation. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + context: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem | TextContentItem] + nullable: true + required: + - content + title: UserMessage + type: object + ToolResponseMessage: + description: A message representing the result of a tool invocation. + properties: + role: + const: tool + default: tool + title: Role + type: string + call_id: + title: Call Id + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + required: + - call_id + - content + title: ToolResponseMessage + type: object + TokenLogProbs: + description: Log probabilities for generated tokens. + properties: + logprobs_by_token: + additionalProperties: + type: number + title: Logprobs By Token + type: object + required: + - logprobs_by_token + title: TokenLogProbs + type: object + EmbeddingsResponse: + description: Response containing generated embeddings. + properties: + embeddings: + items: + items: + type: number + type: array + title: Embeddings + type: array + required: + - embeddings + title: EmbeddingsResponse + type: object + OpenAICompletionLogprobs: + description: |- + The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + properties: + text_offset: + anyOf: + - items: + type: integer + type: array + - type: 'null' + nullable: true + token_logprobs: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + tokens: + anyOf: + - items: + type: string + type: array + - type: 'null' + nullable: true + top_logprobs: + anyOf: + - items: + additionalProperties: + type: number + type: object + type: array + - type: 'null' + nullable: true + title: OpenAICompletionLogprobs + type: object + VectorStoreCreateRequest: + description: Request to create a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + file_ids: + items: + type: string + title: File Ids + type: array + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + chunking_strategy: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + additionalProperties: true + title: Metadata + type: object + title: VectorStoreCreateRequest + type: object + VectorStoreModifyRequest: + description: Request to modify a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + title: VectorStoreModifyRequest + type: object + VectorStoreSearchRequest: + description: Request to search a vector store. + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + max_num_results: + default: 10 + title: Max Num Results + type: integer + ranking_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + rewrite_query: + default: false + title: Rewrite Query + type: boolean + required: + - query + title: VectorStoreSearchRequest + type: object + DialogType: + description: Parameter type for dialog data with semantic output labels. + properties: + type: + const: dialog + default: dialog + title: Type + type: string + title: DialogType + type: object + ConversationMessage: + description: OpenAI-compatible message item for conversations. + properties: + id: + description: unique identifier for this message + title: Id + type: string + content: + description: message content + items: + additionalProperties: true + type: object + title: Content + type: array + role: + description: message role + title: Role + type: string + status: + description: message status + title: Status + type: string + type: + const: message + default: message + title: Type + type: string + object: + const: message + default: message + title: Object + type: string + required: + - id + - content + - role + - status + title: ConversationMessage + type: object + ConversationItemCreateRequest: + description: Request body for creating conversation items. + properties: + items: + description: Items to include in the conversation context. You may add up to 20 items at a time. + items: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + maxItems: 20 + title: Items + type: array + required: + - items + title: ConversationItemCreateRequest + type: object + ToolGroupInput: + description: Input data for registering a tool group. + properties: + toolgroup_id: + title: Toolgroup Id + type: string + provider_id: + title: Provider Id + type: string + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + nullable: true + title: URL + required: + - toolgroup_id + - provider_id + title: ToolGroupInput + type: object + Api: + description: Enumeration of all available APIs in the Llama Stack system. + enum: + - providers + - inference + - safety + - agents + - batches + - vector_io + - datasetio + - scoring + - eval + - post_training + - tool_runtime + - models + - shields + - vector_stores + - datasets + - scoring_functions + - benchmarks + - tool_groups + - files + - prompts + - conversations + - inspect + title: Api + type: string + ProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + required: + - api + - provider_type + - config_class + title: ProviderSpec + type: object + InlineProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + container_image: + anyOf: + - type: string + - type: 'null' + description: |2 + + The container image to use for this implementation. If one is provided, pip_packages will be ignored. + If a provider depends on other providers, the dependencies MUST NOT specify a container image. + nullable: true + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + title: InlineProviderSpec + type: object + RemoteProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + adapter_type: + description: Unique identifier for this adapter + title: Adapter Type + type: string + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + - adapter_type + title: RemoteProviderSpec + type: object + PostTrainingJobLogStream: + description: Stream of logs from a finetuning job. + properties: + job_uuid: + title: Job Uuid + type: string + log_lines: + items: + type: string + title: Log Lines + type: array + required: + - job_uuid + - log_lines + title: PostTrainingJobLogStream + type: object + RLHFAlgorithm: + description: Available reinforcement learning from human feedback algorithms. + enum: + - dpo + title: RLHFAlgorithm + type: string + PostTrainingRLHFRequest: + description: Request to finetune a model using reinforcement learning from human feedback. + properties: + job_uuid: + title: Job Uuid + type: string + finetuned_model: + $ref: '#/components/schemas/URL' + dataset_id: + title: Dataset Id + type: string + validation_dataset_id: + title: Validation Dataset Id + type: string + algorithm: + $ref: '#/components/schemas/RLHFAlgorithm' + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + hyperparam_search_config: + additionalProperties: true + title: Hyperparam Search Config + type: object + logger_config: + additionalProperties: true + title: Logger Config + type: object + required: + - job_uuid + - finetuned_model + - dataset_id + - validation_dataset_id + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + title: PostTrainingRLHFRequest + type: object responses: BadRequest400: description: The request was invalid or malformed @@ -13520,8 +13040,7 @@ components: title: Bad Request detail: The request was invalid or malformed TooManyRequests429: - description: >- - The client has sent too many requests in a given amount of time + description: The client has sent too many requests in a given amount of time content: application/json: schema: @@ -13529,11 +13048,9 @@ components: example: status: 429 title: Too Many Requests - detail: >- - You have exceeded the rate limit. Please try again later. + detail: You have exceeded the rate limit. Please try again later. InternalServerError500: - description: >- - The server encountered an unexpected error + description: The server encountered an unexpected error content: application/json: schema: @@ -13541,113 +13058,101 @@ components: example: status: 500 title: Internal Server Error - detail: >- - An unexpected error occurred. Our team has been notified. + detail: An unexpected error occurred DefaultError: - description: An unexpected error occurred + description: An error occurred content: application/json: schema: $ref: '#/components/schemas/Error' - example: - status: 0 - title: Error - detail: An unexpected error occurred -security: - - Default: [] tags: - - name: Agents - description: >- - APIs for creating and interacting with agentic systems. - x-displayName: Agents - - name: Benchmarks - description: '' - - name: Conversations - description: >- - Protocol for conversation management operations. - x-displayName: Conversations - - name: DatasetIO - description: '' - - name: Datasets - description: '' - - name: Eval - description: >- - Llama Stack Evaluation API for running evaluations on model and agent candidates. - x-displayName: Evaluations - - name: Files - description: >- - This API is used to upload documents that can be used with other Llama Stack - APIs. - x-displayName: Files - - name: Inference - description: >- - Llama Stack Inference API for generating completions, chat completions, and - embeddings. +- description: APIs for creating and interacting with agentic systems. + name: Agents + x-displayName: Agents +- description: |- + The API is designed to allow use of openai client libraries for seamless integration. + This API provides the following extensions: + - idempotent batch creation - This API provides the raw interface to the underlying models. Two kinds of models - are supported: + Note: This API is currently under active development and may undergo changes. + name: Batches + x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale. +- description: '' + name: Benchmarks +- description: Protocol for conversation management operations. + name: Conversations + x-displayName: Conversations +- description: '' + name: DatasetIO +- description: '' + name: Datasets +- description: Llama Stack Evaluation API for running evaluations on model and agent candidates. + name: Eval + x-displayName: Evaluations +- description: This API is used to upload documents that can be used with other Llama Stack APIs. + name: Files + x-displayName: Files +- description: |- + Llama Stack Inference API for generating completions, chat completions, and embeddings. - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - - Embedding models: these models generate embeddings to be used for semantic - search. - x-displayName: Inference - - name: Inspect - description: >- - APIs for inspecting the Llama Stack service, including health status, available - API routes with methods and implementing providers. - x-displayName: Inspect - - name: Models - description: '' - - name: PostTraining (Coming Soon) - description: '' - - name: Prompts - description: >- - Protocol for prompt management operations. - x-displayName: Prompts - - name: Providers - description: >- - Providers API for inspecting, listing, and modifying providers and their configurations. - x-displayName: Providers - - name: Safety - description: OpenAI-compatible Moderations API. - x-displayName: Safety - - name: Scoring - description: '' - - name: ScoringFunctions - description: '' - - name: Shields - description: '' - - name: SyntheticDataGeneration (Coming Soon) - description: '' - - name: ToolGroups - description: '' - - name: ToolRuntime - description: '' - - name: VectorIO - description: '' + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + name: Inference + x-displayName: Inference +- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + name: Inspect + x-displayName: Inspect +- description: '' + name: Models +- description: '' + name: PostTraining (Coming Soon) +- description: Protocol for prompt management operations. + name: Prompts + x-displayName: Prompts +- description: Providers API for inspecting, listing, and modifying providers and their configurations. + name: Providers + x-displayName: Providers +- description: OpenAI-compatible Moderations API. + name: Safety + x-displayName: Safety +- description: '' + name: Scoring +- description: '' + name: ScoringFunctions +- description: '' + name: Shields +- description: '' + name: ToolGroups +- description: '' + name: ToolRuntime +- description: '' + name: VectorIO x-tagGroups: - - name: Operations - tags: - - Agents - - Benchmarks - - Conversations - - DatasetIO - - Datasets - - Eval - - Files - - Inference - - Inspect - - Models - - PostTraining (Coming Soon) - - Prompts - - Providers - - Safety - - Scoring - - ScoringFunctions - - Shields - - SyntheticDataGeneration (Coming Soon) - - ToolGroups - - ToolRuntime - - VectorIO +- name: Operations + tags: + - Agents + - Batches + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - ToolGroups + - ToolRuntime + - VectorIO +security: +- Default: [] diff --git a/containers/Containerfile b/containers/Containerfile index 1ddf102af..4993d3273 100644 --- a/containers/Containerfile +++ b/containers/Containerfile @@ -19,6 +19,8 @@ ARG KEEP_WORKSPACE="" ARG DISTRO_NAME="starter" ARG RUN_CONFIG_PATH="" ARG UV_HTTP_TIMEOUT=500 +ARG UV_EXTRA_INDEX_URL="" +ARG UV_INDEX_STRATEGY="" ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} ENV PYTHONDONTWRITEBYTECODE=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1 @@ -45,7 +47,7 @@ RUN set -eux; \ exit 1; \ fi -RUN pip install --no-cache-dir uv +RUN pip install --no-cache uv ENV UV_SYSTEM_PYTHON=1 ENV INSTALL_MODE=${INSTALL_MODE} @@ -62,47 +64,60 @@ COPY . /workspace # Install the client package if it is provided # NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python +# Unset UV index env vars to ensure we only use PyPI for the client RUN set -eux; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ exit 1; \ fi; \ - uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ + uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \ fi; # Install llama-stack +# Use UV_EXTRA_INDEX_URL inline only for editable install with RC dependencies RUN set -eux; \ + SAVED_UV_EXTRA_INDEX_URL="${UV_EXTRA_INDEX_URL:-}"; \ + SAVED_UV_INDEX_STRATEGY="${UV_INDEX_STRATEGY:-}"; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ "$INSTALL_MODE" = "editable" ]; then \ if [ ! -d "$LLAMA_STACK_DIR" ]; then \ echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \ exit 1; \ fi; \ - uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \ - elif [ "$INSTALL_MODE" = "test-pypi" ]; then \ - uv pip install --no-cache-dir fastapi libcst; \ - if [ -n "$TEST_PYPI_VERSION" ]; then \ - uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \ + if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \ + UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \ + uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \ else \ - uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \ + uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \ + fi; \ + elif [ "$INSTALL_MODE" = "test-pypi" ]; then \ + uv pip install --no-cache fastapi libcst; \ + if [ -n "$TEST_PYPI_VERSION" ]; then \ + uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \ + else \ + uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \ fi; \ else \ if [ -n "$PYPI_VERSION" ]; then \ - uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \ + uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \ else \ - uv pip install --no-cache-dir llama-stack; \ + uv pip install --no-cache llama-stack; \ fi; \ fi; # Install the dependencies for the distribution +# Explicitly unset UV index env vars to ensure we only use PyPI for distribution deps RUN set -eux; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ -z "$DISTRO_NAME" ]; then \ echo "DISTRO_NAME must be provided" >&2; \ exit 1; \ fi; \ deps="$(llama stack list-deps "$DISTRO_NAME")"; \ if [ -n "$deps" ]; then \ - printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \ + printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \ fi # Cleanup diff --git a/docs/README.md b/docs/README.md index 1847e49d8..47e6d9cd7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -13,6 +13,42 @@ npm run serve ``` You can open up the docs in your browser at http://localhost:3000 +## File Import System + +This documentation uses `remark-code-import` to import files directly from the repository, eliminating copy-paste maintenance. Files are automatically embedded during build time. + +### Importing Code Files + +To import Python code (or any code files) with syntax highlighting, use this syntax in `.mdx` files: + +```markdown +```python file=./demo_script.py title="demo_script.py" +``` +``` + +This automatically imports the file content and displays it as a formatted code block with Python syntax highlighting. + +**Note:** Paths are relative to the current `.mdx` file location, not the repository root. + +### Importing Markdown Files as Content + +For importing and rendering markdown files (like CONTRIBUTING.md), use the raw-loader approach: + +```jsx +import Contributing from '!!raw-loader!../../../CONTRIBUTING.md'; +import ReactMarkdown from 'react-markdown'; + +{Contributing} +``` + +**Requirements:** +- Install dependencies: `npm install --save-dev raw-loader react-markdown` + +**Path Resolution:** +- For `remark-code-import`: Paths are relative to the current `.mdx` file location +- For `raw-loader`: Paths are relative to the current `.mdx` file location +- Use `../` to navigate up directories as needed + ## Content Try out Llama Stack's capabilities through our detailed Jupyter notebooks: diff --git a/docs/docs/api-deprecated/index.mdx b/docs/docs/api-deprecated/index.mdx new file mode 100644 index 000000000..0da357e30 --- /dev/null +++ b/docs/docs/api-deprecated/index.mdx @@ -0,0 +1,62 @@ +--- +title: Deprecated APIs +description: Legacy APIs that are being phased out +sidebar_label: Deprecated +sidebar_position: 1 +--- + +# Deprecated APIs + +This section contains APIs that are being phased out in favor of newer, more standardized implementations. These APIs are maintained for backward compatibility but are not recommended for new projects. + +:::warning Deprecation Notice +These APIs are deprecated and will be removed in future versions. Please migrate to the recommended alternatives listed below. +::: + +## Migration Guide + +When using deprecated APIs, please refer to the migration guides provided for each API to understand how to transition to the supported alternatives. + +## Deprecated API List + +### Legacy Inference APIs +Some older inference endpoints that have been superseded by the standardized Inference API. + +**Migration Path:** Use the [Inference API](../api/) instead. + +### Legacy Vector Operations +Older vector database operations that have been replaced by the Vector IO API. + +**Migration Path:** Use the [Vector IO API](../api/) instead. + +### Legacy File Operations +Older file management endpoints that have been replaced by the Files API. + +**Migration Path:** Use the [Files API](../api/) instead. + +## Support Timeline + +Deprecated APIs will be supported according to the following timeline: + +- **Current Version**: Full support with deprecation warnings +- **Next Major Version**: Limited support with migration notices +- **Following Major Version**: Removal of deprecated APIs + +## Getting Help + +If you need assistance migrating from deprecated APIs: + +1. Check the specific migration guides for each API +2. Review the [API Reference](../api/) for current alternatives +3. Consult the [Community Forums](https://github.com/llamastack/llama-stack/discussions) for migration support +4. Open an issue on GitHub for specific migration questions + +## Contributing + +If you find issues with deprecated APIs or have suggestions for improving the migration process, please contribute by: + +1. Opening an issue describing the problem +2. Submitting a pull request with improvements +3. Updating migration documentation + +For more information on contributing, see our [Contributing Guide](../contributing/). diff --git a/docs/docs/api-experimental/index.mdx b/docs/docs/api-experimental/index.mdx new file mode 100644 index 000000000..adbd64582 --- /dev/null +++ b/docs/docs/api-experimental/index.mdx @@ -0,0 +1,128 @@ +--- +title: Experimental APIs +description: APIs in development with limited support +sidebar_label: Experimental +sidebar_position: 1 +--- + +# Experimental APIs + +This section contains APIs that are currently in development and may have limited support or stability. These APIs are available for testing and feedback but should not be used in production environments. + +:::warning Experimental Notice +These APIs are experimental and may change without notice. Use with caution and provide feedback to help improve them. +::: + +## Current Experimental APIs + +### Batch Inference API +Run inference on a dataset of inputs in batch mode for improved efficiency. + +**Status:** In Development +**Provider Support:** Limited +**Use Case:** Large-scale inference operations + +**Features:** +- Batch processing of multiple inputs +- Optimized resource utilization +- Progress tracking and monitoring + +### Batch Agents API +Run agentic workflows on a dataset of inputs in batch mode. + +**Status:** In Development +**Provider Support:** Limited +**Use Case:** Large-scale agent operations + +**Features:** +- Batch agent execution +- Parallel processing capabilities +- Result aggregation and analysis + +### Synthetic Data Generation API +Generate synthetic data for model development and testing. + +**Status:** Early Development +**Provider Support:** Very Limited +**Use Case:** Training data augmentation + +**Features:** +- Automated data generation +- Quality control mechanisms +- Customizable generation parameters + +### Batches API (OpenAI-compatible) +OpenAI-compatible batch management for inference operations. + +**Status:** In Development +**Provider Support:** Limited +**Use Case:** OpenAI batch processing compatibility + +**Features:** +- OpenAI batch API compatibility +- Job scheduling and management +- Status tracking and monitoring + +## Getting Started with Experimental APIs + +### Prerequisites +- Llama Stack server running with experimental features enabled +- Appropriate provider configurations +- Understanding of API limitations + +### Configuration +Experimental APIs may require special configuration flags or provider settings. Check the specific API documentation for setup requirements. + +### Usage Guidelines +1. **Testing Only**: Use experimental APIs for testing and development only +2. **Monitor Changes**: Watch for updates and breaking changes +3. **Provide Feedback**: Report issues and suggest improvements +4. **Backup Data**: Always backup important data when using experimental features + +## Feedback and Contribution + +We encourage feedback on experimental APIs to help improve them: + +### Reporting Issues +- Use GitHub issues with the "experimental" label +- Include detailed error messages and reproduction steps +- Specify the API version and provider being used + +### Feature Requests +- Submit feature requests through GitHub discussions +- Provide use cases and expected behavior +- Consider contributing implementations + +### Testing +- Test experimental APIs in your environment +- Report performance issues and optimization opportunities +- Share success stories and use cases + +## Migration to Stable APIs + +As experimental APIs mature, they will be moved to the stable API section. When this happens: + +1. **Announcement**: We'll announce the promotion in release notes +2. **Migration Guide**: Detailed migration instructions will be provided +3. **Deprecation Timeline**: Experimental versions will be deprecated with notice +4. **Support**: Full support will be available for stable versions + +## Provider Support + +Experimental APIs may have limited provider support. Check the specific API documentation for: + +- Supported providers +- Configuration requirements +- Known limitations +- Performance characteristics + +## Roadmap + +Experimental APIs are part of our ongoing development roadmap: + +- **Q1 2024**: Batch Inference API stabilization +- **Q2 2024**: Batch Agents API improvements +- **Q3 2024**: Synthetic Data Generation API expansion +- **Q4 2024**: Batches API full OpenAI compatibility + +For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions). diff --git a/docs/docs/api-openai/index.mdx b/docs/docs/api-openai/index.mdx new file mode 100644 index 000000000..99f3edaa7 --- /dev/null +++ b/docs/docs/api-openai/index.mdx @@ -0,0 +1,287 @@ +--- +title: OpenAI API Compatibility +description: OpenAI-compatible APIs and features in Llama Stack +sidebar_label: OpenAI Compatibility +sidebar_position: 1 +--- + +# OpenAI API Compatibility + +Llama Stack provides comprehensive OpenAI API compatibility, allowing you to use existing OpenAI API clients and tools with Llama Stack providers. This compatibility layer ensures seamless migration and interoperability. + +## Overview + +OpenAI API compatibility in Llama Stack includes: + +- **OpenAI-compatible endpoints** for all major APIs +- **Request/response format compatibility** with OpenAI standards +- **Authentication and authorization** using OpenAI-style API keys +- **Error handling** with OpenAI-compatible error codes and messages +- **Rate limiting** and usage tracking compatible with OpenAI patterns + +## Supported OpenAI APIs + +### Chat Completions API +OpenAI-compatible chat completions for conversational AI applications. + +**Endpoint:** `/v1/chat/completions` +**Compatibility:** Full OpenAI API compatibility +**Providers:** All inference providers + +**Features:** +- Message-based conversations +- System prompts and user messages +- Function calling support +- Streaming responses +- Temperature and other parameter controls + +### Completions API +OpenAI-compatible text completions for general text generation. + +**Endpoint:** `/v1/completions` +**Compatibility:** Full OpenAI API compatibility +**Providers:** All inference providers + +**Features:** +- Text completion generation +- Prompt engineering support +- Customizable parameters +- Batch processing capabilities + +### Embeddings API +OpenAI-compatible embeddings for vector operations. + +**Endpoint:** `/v1/embeddings` +**Compatibility:** Full OpenAI API compatibility +**Providers:** All embedding providers + +**Features:** +- Text embedding generation +- Multiple embedding models +- Batch embedding processing +- Vector similarity operations + +### Files API +OpenAI-compatible file management for document processing. + +**Endpoint:** `/v1/files` +**Compatibility:** Full OpenAI API compatibility +**Providers:** Local Filesystem, S3 + +**Features:** +- File upload and management +- Document processing +- File metadata tracking +- Secure file access + +### Vector Store Files API +OpenAI-compatible vector store file operations for RAG applications. + +**Endpoint:** `/v1/vector_stores/{vector_store_id}/files` +**Compatibility:** Full OpenAI API compatibility +**Providers:** FAISS, SQLite-vec, Milvus, ChromaDB, Qdrant, Weaviate, Postgres (PGVector) + +**Features:** +- Automatic document processing +- Vector store integration +- File chunking and indexing +- Search and retrieval operations + +### Batches API +OpenAI-compatible batch processing for large-scale operations. + +**Endpoint:** `/v1/batches` +**Compatibility:** OpenAI API compatibility (experimental) +**Providers:** Limited support + +**Features:** +- Batch job creation and management +- Progress tracking +- Result retrieval +- Error handling + +## Migration from OpenAI + +### Step 1: Update API Endpoint +Change your API endpoint from OpenAI to your Llama Stack server: + +```python +# Before (OpenAI) +import openai +client = openai.OpenAI(api_key="your-openai-key") + +# After (Llama Stack) +import openai +client = openai.OpenAI( + api_key="your-llama-stack-key", + base_url="http://localhost:8000/v1" # Your Llama Stack server +) +``` + +### Step 2: Configure Providers +Set up your preferred providers in the Llama Stack configuration: + +```yaml +# stack-config.yaml +inference: + providers: + - name: "meta-reference" + type: "inline" + model: "llama-3.1-8b" +``` + +### Step 3: Test Compatibility +Verify that your existing code works with Llama Stack: + +```python +# Test chat completions +response = client.chat.completions.create( + model="llama-3.1-8b", + messages=[ + {"role": "user", "content": "Hello, world!"} + ] +) +print(response.choices[0].message.content) +``` + +## Provider-Specific Features + +### Meta Reference Provider +- Full OpenAI API compatibility +- Local model execution +- Custom model support + +### Remote Providers +- OpenAI API compatibility +- Cloud-based execution +- Scalable infrastructure + +### Vector Store Providers +- OpenAI vector store API compatibility +- Automatic document processing +- Advanced search capabilities + +## Authentication + +Llama Stack supports OpenAI-style authentication: + +### API Key Authentication +```python +client = openai.OpenAI( + api_key="your-api-key", + base_url="http://localhost:8000/v1" +) +``` + +### Environment Variables +```bash +export OPENAI_API_KEY="your-api-key" +export OPENAI_BASE_URL="http://localhost:8000/v1" +``` + +## Error Handling + +Llama Stack provides OpenAI-compatible error responses: + +```python +try: + response = client.chat.completions.create(...) +except openai.APIError as e: + print(f"API Error: {e}") +except openai.RateLimitError as e: + print(f"Rate Limit Error: {e}") +except openai.APIConnectionError as e: + print(f"Connection Error: {e}") +``` + +## Rate Limiting + +OpenAI-compatible rate limiting is supported: + +- **Requests per minute** limits +- **Tokens per minute** limits +- **Concurrent request** limits +- **Usage tracking** and monitoring + +## Monitoring and Observability + +Track your API usage with OpenAI-compatible monitoring: + +- **Request/response logging** +- **Usage metrics** and analytics +- **Performance monitoring** +- **Error tracking** and alerting + +## Best Practices + +### 1. Provider Selection +Choose providers based on your requirements: +- **Local development**: Meta Reference, Ollama +- **Production**: Cloud providers (Fireworks, Together, NVIDIA) +- **Specialized use cases**: Custom providers + +### 2. Model Configuration +Configure models for optimal performance: +- **Model selection** based on task requirements +- **Parameter tuning** for specific use cases +- **Resource allocation** for performance + +### 3. Error Handling +Implement robust error handling: +- **Retry logic** for transient failures +- **Fallback providers** for high availability +- **Monitoring** and alerting for issues + +### 4. Security +Follow security best practices: +- **API key management** and rotation +- **Access control** and authorization +- **Data privacy** and compliance + +## Implementation Examples + +For detailed code examples and implementation guides, see our [OpenAI Implementation Guide](../providers/openai.mdx). + +## Known Limitations + +### Responses API Limitations +The Responses API is still in active development. For detailed information about current limitations and implementation status, see our [OpenAI Responses API Limitations](../providers/openai_responses_limitations.mdx). + +## Troubleshooting + +### Common Issues + +**Connection Errors** +- Verify server is running +- Check network connectivity +- Validate API endpoint URL + +**Authentication Errors** +- Verify API key is correct +- Check key permissions +- Ensure proper authentication headers + +**Model Errors** +- Verify model is available +- Check provider configuration +- Validate model parameters + +### Getting Help + +For OpenAI compatibility issues: + +1. **Check Documentation**: Review provider-specific documentation +2. **Community Support**: Ask questions in GitHub discussions +3. **Issue Reporting**: Open GitHub issues for bugs +4. **Professional Support**: Contact support for enterprise issues + +## Roadmap + +Upcoming OpenAI compatibility features: + +- **Enhanced batch processing** support +- **Advanced function calling** capabilities +- **Improved error handling** and diagnostics +- **Performance optimizations** for large-scale deployments + +For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions). diff --git a/docs/docs/api/index.mdx b/docs/docs/api/index.mdx new file mode 100644 index 000000000..7088c6c2b --- /dev/null +++ b/docs/docs/api/index.mdx @@ -0,0 +1,144 @@ +--- +title: API Reference +description: Complete reference for Llama Stack APIs +sidebar_label: Overview +sidebar_position: 1 +--- + +# API Reference + +Llama Stack provides a comprehensive set of APIs for building generative AI applications. All APIs follow OpenAI-compatible standards and can be used interchangeably across different providers. + +## Core APIs + +### Inference API +Run inference with Large Language Models (LLMs) and embedding models. + +**Supported Providers:** +- Meta Reference (Single Node) +- Ollama (Single Node) +- Fireworks (Hosted) +- Together (Hosted) +- NVIDIA NIM (Hosted and Single Node) +- vLLM (Hosted and Single Node) +- TGI (Hosted and Single Node) +- AWS Bedrock (Hosted) +- Cerebras (Hosted) +- Groq (Hosted) +- SambaNova (Hosted) +- PyTorch ExecuTorch (On-device iOS, Android) +- OpenAI (Hosted) +- Anthropic (Hosted) +- Gemini (Hosted) +- WatsonX (Hosted) + +### Agents API +Run multi-step agentic workflows with LLMs, including tool usage, memory (RAG), and complex reasoning. + +**Supported Providers:** +- Meta Reference (Single Node) +- Fireworks (Hosted) +- Together (Hosted) +- PyTorch ExecuTorch (On-device iOS) + +### Vector IO API +Perform operations on vector stores, including adding documents, searching, and deleting documents. + +**Supported Providers:** +- FAISS (Single Node) +- SQLite-Vec (Single Node) +- Chroma (Hosted and Single Node) +- Milvus (Hosted and Single Node) +- Postgres (PGVector) (Hosted and Single Node) +- Weaviate (Hosted) +- Qdrant (Hosted and Single Node) + +### Files API (OpenAI-compatible) +Manage file uploads, storage, and retrieval with OpenAI-compatible endpoints. + +**Supported Providers:** +- Local Filesystem (Single Node) +- S3 (Hosted) + +### Vector Store Files API (OpenAI-compatible) +Integrate file operations with vector stores for automatic document processing and search. + +**Supported Providers:** +- FAISS (Single Node) +- SQLite-vec (Single Node) +- Milvus (Single Node) +- ChromaDB (Hosted and Single Node) +- Qdrant (Hosted and Single Node) +- Weaviate (Hosted) +- Postgres (PGVector) (Hosted and Single Node) + +### Safety API +Apply safety policies to outputs at a systems level, not just model level. + +**Supported Providers:** +- Llama Guard (Depends on Inference Provider) +- Prompt Guard (Single Node) +- Code Scanner (Single Node) +- AWS Bedrock (Hosted) + +### Post Training API +Fine-tune models for specific use cases and domains. + +**Supported Providers:** +- Meta Reference (Single Node) +- HuggingFace (Single Node) +- TorchTune (Single Node) +- NVIDIA NEMO (Hosted) + +### Eval API +Generate outputs and perform scoring to evaluate system performance. + +**Supported Providers:** +- Meta Reference (Single Node) +- NVIDIA NEMO (Hosted) + +### Telemetry API +Collect telemetry data from the system for monitoring and observability. + +**Supported Providers:** +- Meta Reference (Single Node) + +### Tool Runtime API +Interact with various tools and protocols to extend LLM capabilities. + +**Supported Providers:** +- Brave Search (Hosted) +- RAG Runtime (Single Node) + +## API Compatibility + +All Llama Stack APIs are designed to be OpenAI-compatible, allowing you to: +- Use existing OpenAI API clients and tools +- Migrate from OpenAI to other providers seamlessly +- Maintain consistent API contracts across different environments + +## Getting Started + +To get started with Llama Stack APIs: + +1. **Choose a Distribution**: Select a pre-configured distribution that matches your environment +2. **Configure Providers**: Set up the providers you want to use for each API +3. **Start the Server**: Launch the Llama Stack server with your configuration +4. **Use the APIs**: Make requests to the API endpoints using your preferred client + +For detailed setup instructions, see our [Getting Started Guide](../getting_started/quickstart). + +## Provider Details + +For complete provider compatibility and setup instructions, see our [Providers Documentation](../providers/). + +## API Stability + +Llama Stack APIs are organized by stability level: +- **[Stable APIs](./index.mdx)** - Production-ready APIs with full support +- **[Experimental APIs](../api-experimental/)** - APIs in development with limited support +- **[Deprecated APIs](../api-deprecated/)** - Legacy APIs being phased out + +## OpenAI Integration + +For specific OpenAI API compatibility features, see our [OpenAI Compatibility Guide](../api-openai/). diff --git a/docs/docs/building_applications/index.mdx b/docs/docs/building_applications/index.mdx index a4b71efd7..935a02f8a 100644 --- a/docs/docs/building_applications/index.mdx +++ b/docs/docs/building_applications/index.mdx @@ -35,9 +35,6 @@ Here are the key topics that will help you build effective AI applications: - **[Telemetry](./telemetry.mdx)** - Monitor and analyze your agents' performance and behavior - **[Safety](./safety.mdx)** - Implement guardrails and safety measures to ensure responsible AI behavior -### 🎮 **Interactive Development** -- **[Playground](./playground.mdx)** - Interactive environment for testing and developing applications - ## Application Patterns ### 🤖 **Conversational Agents** diff --git a/docs/docs/building_applications/playground.mdx b/docs/docs/building_applications/playground.mdx index f3290a356..1afb250c4 100644 --- a/docs/docs/building_applications/playground.mdx +++ b/docs/docs/building_applications/playground.mdx @@ -1,298 +1,87 @@ --- -title: Llama Stack Playground -description: Interactive interface to explore and experiment with Llama Stack capabilities +title: Admin UI & Chat Playground +description: Web-based admin interface and chat playground for Llama Stack sidebar_label: Playground sidebar_position: 10 --- -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +# Admin UI & Chat Playground -# Llama Stack Playground +The Llama Stack UI provides a comprehensive web-based admin interface for managing your Llama Stack server, with an integrated chat playground for interactive testing. This admin interface is the primary way to monitor, manage, and debug your Llama Stack applications. -:::note[Experimental Feature] -The Llama Stack Playground is currently experimental and subject to change. We welcome feedback and contributions to help improve it. -::: +## Quick Start -The Llama Stack Playground is a simple interface that aims to: -- **Showcase capabilities and concepts** of Llama Stack in an interactive environment -- **Demo end-to-end application code** to help users get started building their own applications -- **Provide a UI** to help users inspect and understand Llama Stack API providers and resources - -## Key Features - -### Interactive Playground Pages - -The playground provides interactive pages for users to explore Llama Stack API capabilities: - -#### Chatbot Interface - - - - - - -**Simple Chat Interface** -- Chat directly with Llama models through an intuitive interface -- Uses the `/chat/completions` streaming API under the hood -- Real-time message streaming for responsive interactions -- Perfect for testing model capabilities and prompt engineering - - - - -**Document-Aware Conversations** -- Upload documents to create memory banks -- Chat with a RAG-enabled agent that can query your documents -- Uses Llama Stack's `/agents` API to create and manage RAG sessions -- Ideal for exploring knowledge-enhanced AI applications - - - - -#### Evaluation Interface - - - - - - -**Custom Dataset Evaluation** -- Upload your own evaluation datasets -- Run evaluations using available scoring functions -- Uses Llama Stack's `/scoring` API for flexible evaluation workflows -- Great for testing application performance on custom metrics - - - - - - -**Pre-registered Evaluation Tasks** -- Evaluate models or agents on pre-defined tasks -- Uses Llama Stack's `/eval` API for comprehensive evaluation -- Combines datasets and scoring functions for standardized testing - -**Setup Requirements:** -Register evaluation datasets and benchmarks first: +Launch the admin UI with: ```bash -# Register evaluation dataset -llama-stack-client datasets register \ - --dataset-id "mmlu" \ - --provider-id "huggingface" \ - --url "https://huggingface.co/datasets/llamastack/evals" \ - --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ - --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string"}, "chat_completion_input": {"type": "string"}}' - -# Register benchmark task -llama-stack-client benchmarks register \ - --eval-task-id meta-reference-mmlu \ - --provider-id meta-reference \ - --dataset-id mmlu \ - --scoring-functions basic::regex_parser_multiple_choice_answer +npx llama-stack-ui ``` - - +Then visit `http://localhost:8322` to access the interface. -#### Inspection Interface +## Admin Interface Features - +The Llama Stack UI is organized into three main sections: - - +### 🎯 Create +**Chat Playground** - Interactive testing environment +- Real-time chat interface for testing agents and models +- Multi-turn conversations with tool calling support +- Agent SDK integration (will be migrated to Responses API) +- Custom system prompts and model parameter adjustment -**Provider Management** -- Inspect available Llama Stack API providers -- View provider configurations and capabilities -- Uses the `/providers` API for real-time provider information -- Essential for understanding your deployment's capabilities +### 📊 Manage +**Logs & Resource Management** - Monitor and manage your stack +- **Responses Logs**: View and analyze agent responses and interactions +- **Chat Completions Logs**: Monitor chat completion requests and responses +- **Vector Stores**: Create, manage, and monitor vector databases for RAG workflows +- **Prompts**: Full CRUD operations for prompt templates and management +- **Files**: Forthcoming file management capabilities - - +## Key Capabilities for Application Development -**Resource Exploration** -- Inspect Llama Stack API resources including: - - **Models**: Available language models - - **Datasets**: Registered evaluation datasets - - **Memory Banks**: Vector databases and knowledge stores - - **Benchmarks**: Evaluation tasks and scoring functions - - **Shields**: Safety and content moderation tools -- Uses `//list` APIs for comprehensive resource visibility -- For detailed information about resources, see [Core Concepts](/docs/concepts) +### Real-time Monitoring +- **Response Tracking**: Monitor all agent responses and tool calls +- **Completion Analysis**: View chat completion performance and patterns +- **Vector Store Activity**: Track RAG operations and document processing +- **Prompt Usage**: Analyze prompt template performance - - +### Resource Management +- **Vector Store CRUD**: Create, update, and delete vector databases +- **Prompt Library**: Organize and version control your prompts +- **File Operations**: Manage documents and assets (forthcoming) + +### Interactive Testing +- **Chat Playground**: Test conversational flows before production deployment +- **Agent Prototyping**: Validate agent behaviors and tool integrations + +## Development Workflow Integration + +The admin UI supports your development lifecycle: + +1. **Development**: Use chat playground to prototype and test features +2. **Monitoring**: Track system performance through logs and metrics +3. **Management**: Organize prompts, vector stores, and other resources +4. **Debugging**: Analyze logs to identify and resolve issues + +## Architecture Notes + +- **Current**: Chat playground uses Agents SDK +- **Future**: Migration to Responses API for improved performance and consistency +- **Admin Focus**: Primary emphasis on monitoring, logging, and resource management ## Getting Started -### Quick Start Guide +1. **Launch the UI**: Run `npx llama-stack-ui` +2. **Explore Logs**: Start with Responses and Chat Completions logs to understand your system activity +3. **Test in Playground**: Use the chat interface to validate your agent configurations +4. **Manage Resources**: Create vector stores and organize prompts through the UI - - +For detailed setup and configuration, see the [Llama Stack UI documentation](/docs/distributions/llama_stack_ui). -**1. Start the Llama Stack API Server** +## Next Steps -```bash -llama stack list-deps together | xargs -L1 uv pip install -llama stack run together -``` - -**2. Start the Streamlit UI** - -```bash -# Launch the playground interface -uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py -``` - - - - -**Making the Most of the Playground:** - -- **Start with Chat**: Test basic model interactions and prompt engineering -- **Explore RAG**: Upload sample documents to see knowledge-enhanced responses -- **Try Evaluations**: Use the scoring interface to understand evaluation metrics -- **Inspect Resources**: Check what providers and resources are available -- **Experiment with Settings**: Adjust parameters to see how they affect results - - - - -### Available Distributions - -The playground works with any Llama Stack distribution. Popular options include: - - - - -```bash -llama stack list-deps together | xargs -L1 uv pip install -llama stack run together -``` - -**Features:** -- Cloud-hosted models -- Fast inference -- Multiple model options - - - - -```bash -llama stack list-deps ollama | xargs -L1 uv pip install -llama stack run ollama -``` - -**Features:** -- Local model execution -- Privacy-focused -- No internet required - - - - -```bash -llama stack list-deps meta-reference | xargs -L1 uv pip install -llama stack run meta-reference -``` - -**Features:** -- Reference implementation -- All API features available -- Best for development - - - - -## Use Cases & Examples - -### Educational Use Cases -- **Learning Llama Stack**: Hands-on exploration of API capabilities -- **Prompt Engineering**: Interactive testing of different prompting strategies -- **RAG Experimentation**: Understanding how document retrieval affects responses -- **Evaluation Understanding**: See how different metrics evaluate model performance - -### Development Use Cases -- **Prototype Testing**: Quick validation of application concepts -- **API Exploration**: Understanding available endpoints and parameters -- **Integration Planning**: Seeing how different components work together -- **Demo Creation**: Showcasing Llama Stack capabilities to stakeholders - -### Research Use Cases -- **Model Comparison**: Side-by-side testing of different models -- **Evaluation Design**: Understanding how scoring functions work -- **Safety Testing**: Exploring shield effectiveness with different inputs -- **Performance Analysis**: Measuring model behavior across different scenarios - -## Best Practices - -### 🚀 **Getting Started** -- Begin with simple chat interactions to understand basic functionality -- Gradually explore more advanced features like RAG and evaluations -- Use the inspection tools to understand your deployment's capabilities - -### 🔧 **Development Workflow** -- Use the playground to prototype before writing application code -- Test different parameter settings interactively -- Validate evaluation approaches before implementing them programmatically - -### 📊 **Evaluation & Testing** -- Start with simple scoring functions before trying complex evaluations -- Use the playground to understand evaluation results before automation -- Test safety features with various input types - -### 🎯 **Production Preparation** -- Use playground insights to inform your production API usage -- Test edge cases and error conditions interactively -- Validate resource configurations before deployment - -## Related Resources - -- **[Getting Started Guide](../getting_started/quickstart)** - Complete setup and introduction -- **[Core Concepts](/docs/concepts)** - Understanding Llama Stack fundamentals -- **[Agents](./agent)** - Building intelligent agents -- **[RAG (Retrieval Augmented Generation)](./rag)** - Knowledge-enhanced applications -- **[Evaluations](./evals)** - Comprehensive evaluation framework -- **[API Reference](/docs/api/llama-stack-specification)** - Complete API documentation +- Set up your [first agent](/docs/building_applications/agent) +- Implement [RAG functionality](/docs/building_applications/rag) +- Add [evaluation metrics](/docs/building_applications/evals) +- Configure [safety measures](/docs/building_applications/safety) diff --git a/docs/docs/building_applications/safety.mdx b/docs/docs/building_applications/safety.mdx index 16fe5f6f8..998c02b20 100644 --- a/docs/docs/building_applications/safety.mdx +++ b/docs/docs/building_applications/safety.mdx @@ -391,5 +391,4 @@ client.shields.register( - **[Agents](./agent)** - Integrating safety shields with intelligent agents - **[Agent Execution Loop](./agent_execution_loop)** - Understanding safety in the execution flow - **[Evaluations](./evals)** - Evaluating safety shield effectiveness -- **[Telemetry](./telemetry)** - Monitoring safety violations and metrics - **[Llama Guard Documentation](https://github.com/meta-llama/PurpleLlama/tree/main/Llama-Guard3)** - Advanced safety model details diff --git a/docs/docs/building_applications/telemetry.mdx b/docs/docs/building_applications/telemetry.mdx index 2f1d80d41..761f444ef 100644 --- a/docs/docs/building_applications/telemetry.mdx +++ b/docs/docs/building_applications/telemetry.mdx @@ -10,203 +10,34 @@ import TabItem from '@theme/TabItem'; # Telemetry -The Llama Stack uses OpenTelemetry to provide comprehensive tracing, metrics, and logging capabilities. +The preferred way to instrument Llama Stack is with OpenTelemetry. Llama Stack enriches the data +collected by OpenTelemetry to capture helpful information about the performance and behavior of your +application. Here is an example of how to forward your telemetry to an OTLP collector from Llama Stack: +```sh +export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318" +export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +export OTEL_SERVICE_NAME="llama-stack-server" -## Automatic Metrics Generation +uv pip install opentelemetry-distro opentelemetry-exporter-otlp +uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement - -Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance. - -### Available Metrics - -The following metrics are automatically generated for each inference request: - -| Metric Name | Type | Unit | Description | Labels | -|-------------|------|------|-------------|--------| -| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` | -| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` | -| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` | - -### Metric Generation Flow - -1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses -2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts -3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks -4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters - -### Metric Aggregation Level - -All metrics are generated and aggregated at the **inference request level**. This means: - -- Each individual inference request generates its own set of metrics -- Metrics are not pre-aggregated across multiple requests -- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.) -- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping - -### Example Metric Event - -```python -MetricEvent( - trace_id="1234567890abcdef", - span_id="abcdef1234567890", - metric="total_tokens", - value=150, - timestamp=1703123456.789, - unit="tokens", - attributes={ - "model_id": "meta-llama/Llama-3.2-3B-Instruct", - "provider_id": "tgi" - }, -) +uv run opentelemetry-instrument llama stack run run.yaml ``` -## Telemetry Sinks -Choose from multiple sink types based on your observability needs: +### Known issues - - +Some database instrumentation libraries have a known bug where spans get wrapped twice, or do not get connected to a trace. +To prevent this, you can disable database specific tracing, and rely just on the SQLAlchemy tracing. If you are using +`sqlite3` as your database, for example, you can disable the additional tracing like this: -Send events to an OpenTelemetry Collector for integration with observability platforms: - -**Use Cases:** -- Visualizing traces in tools like Jaeger -- Collecting metrics for Prometheus -- Integration with enterprise observability stacks - -**Features:** -- Standard OpenTelemetry format -- Compatible with all OpenTelemetry collectors -- Supports both traces and metrics - - - - -Print events to the console for immediate debugging: - -**Use Cases:** -- Development and testing -- Quick debugging sessions -- Simple logging without external tools - -**Features:** -- Immediate output visibility -- No setup required -- Human-readable format - - - - -## Configuration - -### Meta-Reference Provider - -Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types: - -```yaml -telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "llama-stack-service" - sinks: ['console', 'otel_trace', 'otel_metric'] - otel_exporter_otlp_endpoint: "http://localhost:4318" +```sh +export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3" ``` -### Environment Variables - -Configure telemetry behavior using environment variables: - -- **`OTEL_EXPORTER_OTLP_ENDPOINT`**: OpenTelemetry Collector endpoint (default: `http://localhost:4318`) -- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string) -- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `[]`) - -### Quick Setup: Complete Telemetry Stack - -Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana): - -```bash -./scripts/telemetry/setup_telemetry.sh -``` - -This sets up: -- **Jaeger UI**: http://localhost:16686 (traces visualization) -- **Prometheus**: http://localhost:9090 (metrics) -- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources) -- **OTEL Collector**: http://localhost:4318 (OTLP endpoint) - -Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`. - -## Querying Metrics - -When using the OpenTelemetry sink, metrics are exposed in standard format and can be queried through various tools: - - - - -Example Prometheus queries for analyzing token usage: - -```promql -# Total tokens used across all models -sum(llama_stack_tokens_total) - -# Tokens per model -sum by (model_id) (llama_stack_tokens_total) - -# Average tokens per request over 5 minutes -rate(llama_stack_tokens_total[5m]) - -# Token usage by provider -sum by (provider_id) (llama_stack_tokens_total) -``` - - - - -Create dashboards using Prometheus as a data source: - -- **Token Usage Over Time**: Line charts showing token consumption trends -- **Model Performance**: Comparison of different models by token efficiency -- **Provider Analysis**: Breakdown of usage across different providers -- **Request Patterns**: Understanding peak usage times and patterns - - - - -Forward metrics to other observability systems: - -- Export to multiple backends simultaneously -- Apply transformations and filtering -- Integrate with existing monitoring infrastructure - - - - -## Best Practices - -### 🔍 **Monitoring Strategy** -- Use OpenTelemetry for production environments -- Set up alerts on key metrics like token usage and error rates - -### 📊 **Metrics Analysis** -- Track token usage trends to optimize costs -- Monitor response times across different models -- Analyze usage patterns to improve resource allocation - -### 🚨 **Alerting & Debugging** -- Set up alerts for unusual token consumption spikes -- Use trace data to debug performance issues -- Monitor error rates and failure patterns - -### 🔧 **Configuration Management** -- Use environment variables for flexible deployment -- Ensure proper network access to OpenTelemetry collectors - ## Related Resources -- **[Agents](./agent)** - Monitoring agent execution with telemetry -- **[Evaluations](./evals)** - Using telemetry data for performance evaluation -- **[Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)** - Telemetry examples and queries - **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework - **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx index 3b78ec57b..f7b913fef 100644 --- a/docs/docs/building_applications/tools.mdx +++ b/docs/docs/building_applications/tools.mdx @@ -104,23 +104,19 @@ client.toolgroups.register( ) ``` -Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide authorization headers to send to the MCP server using the "Provider Data" abstraction provided by Llama Stack. When making an agent call, +Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide the authorization token when creating the Agent: ```python agent = Agent( ..., - tools=["mcp::deepwiki"], - extra_headers={ - "X-LlamaStack-Provider-Data": json.dumps( - { - "mcp_headers": { - "http://mcp.deepwiki.com/sse": { - "Authorization": "Bearer ", - }, - }, - } - ), - }, + tools=[ + { + "type": "mcp", + "server_url": "https://mcp.deepwiki.com/sse", + "server_label": "mcp::deepwiki", + "authorization": "", # OAuth token (without "Bearer " prefix) + } + ], ) agent.create_turn(...) ``` diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx index 42819a4ac..005b85647 100644 --- a/docs/docs/concepts/apis/external.mdx +++ b/docs/docs/concepts/apis/external.mdx @@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha ```python # llama_stack_api_weather/api.py -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec def available_providers() -> list[ProviderSpec]: @@ -79,7 +79,7 @@ A Protocol class like so: # llama_stack_api_weather/api.py from typing import Protocol -from llama_stack.schema_utils import webmethod +from llama_stack_api import webmethod class WeatherAPI(Protocol): @@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"] # llama-stack-api-weather/src/llama_stack_api_weather/weather.py from typing import Protocol -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, ProviderSpec, RemoteProviderSpec, + webmethod, ) -from llama_stack.schema_utils import webmethod - def available_providers() -> list[ProviderSpec]: return [ diff --git a/docs/docs/concepts/apis/index.mdx b/docs/docs/concepts/apis/index.mdx index 6e699d137..c4b561606 100644 --- a/docs/docs/concepts/apis/index.mdx +++ b/docs/docs/concepts/apis/index.mdx @@ -7,7 +7,7 @@ sidebar_position: 1 # APIs -A Llama Stack API is described as a collection of REST endpoints. We currently support the following APIs: +A Llama Stack API is described as a collection of REST endpoints following OpenAI API standards. We currently support the following APIs: - **Inference**: run inference with a LLM - **Safety**: apply safety policies to the output at a Systems (not only model) level @@ -16,13 +16,25 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s - **Scoring**: evaluate outputs of the system - **Eval**: generate outputs (via Inference or Agents) and perform scoring - **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents -- **Telemetry**: collect telemetry data from the system +- **Files**: manage file uploads, storage, and retrieval - **Post Training**: fine-tune a model - **Tool Runtime**: interact with various tools and protocols -- **Responses**: generate responses from an LLM using this OpenAI compatible API. +- **Responses**: generate responses from an LLM We are working on adding a few more APIs to complete the application lifecycle. These will include: - **Batch Inference**: run inference on a dataset of inputs - **Batch Agents**: run agents on a dataset of inputs -- **Synthetic Data Generation**: generate synthetic data for model development - **Batches**: OpenAI-compatible batch management for inference + + +## OpenAI API Compatibility +We are working on adding OpenAI API compatibility to Llama Stack. This will allow you to use Llama Stack with OpenAI API clients and tools. + +### File Operations and Vector Store Integration + +The Files API and Vector Store APIs work together through file operations, enabling automatic document processing and search. This integration implements the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files) and allows you to: +- Upload documents through the Files API +- Automatically process and chunk documents into searchable vectors +- Store processed content in vector databases based on the availability of [our providers](../../providers/index.mdx) +- Search through documents using natural language queries +For detailed information about this integration, see [File Operations and Vector Store Integration](../file_operations_vector_stores.md). diff --git a/docs/docs/concepts/file_operations_vector_stores.mdx b/docs/docs/concepts/file_operations_vector_stores.mdx new file mode 100644 index 000000000..6168ecf9d --- /dev/null +++ b/docs/docs/concepts/file_operations_vector_stores.mdx @@ -0,0 +1,420 @@ +# File Operations and Vector Store Integration + +## Overview + +Llama Stack provides seamless integration between the Files API and Vector Store APIs, enabling you to upload documents and automatically process them into searchable vector embeddings. This integration implements file operations following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files). + +## Enhanced Capabilities Beyond OpenAI + +While Llama Stack maintains full compatibility with OpenAI's Vector Store API, it provides several additional capabilities that enhance functionality and flexibility: + +### **Embedding Model Specification** +Unlike OpenAI's vector stores which use a fixed embedding model, Llama Stack allows you to specify which embedding model to use when creating a vector store: + +```python +# Create vector store with specific embedding model +vector_store = client.vector_stores.create( + name="my_documents", + embedding_model="all-MiniLM-L6-v2", # Specify your preferred model + embedding_dimension=384, +) +``` + +### **Advanced Search Modes** +Llama Stack supports multiple search modes beyond basic vector similarity: + +- **Vector Search**: Pure semantic similarity search using embeddings +- **Keyword Search**: Traditional keyword-based search for exact matches +- **Hybrid Search**: Combines both vector and keyword search for optimal results + +```python +# Different search modes +results = await client.vector_stores.search( + vector_store_id=vector_store.id, + query="machine learning algorithms", + search_mode="hybrid", # or "vector", "keyword" + max_num_results=5, +) +``` + +### **Flexible Ranking Options** +For hybrid search, Llama Stack offers configurable ranking strategies: + +- **RRF (Reciprocal Rank Fusion)**: Combines rankings with configurable impact factor +- **Weighted Ranker**: Linear combination of vector and keyword scores with adjustable weights + +```python +# Custom ranking configuration +results = await client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks", + search_mode="hybrid", + ranking_options={ + "ranker": {"type": "weighted", "alpha": 0.7} # 70% vector, 30% keyword + }, +) +``` + +### **Provider Selection** +Choose from multiple vector store providers based on your specific needs: + +- **Inline Providers**: FAISS (fast in-memory), SQLite-vec (disk-based), Milvus (high-performance) +- **Remote Providers**: ChromaDB, Qdrant, Weaviate, Postgres (PGVector), Milvus + +```python +# Specify provider when creating vector store +vector_store = client.vector_stores.create( + name="my_documents", provider_id="sqlite-vec" # Choose your preferred provider +) +``` + +## How It Works + +The file operations work through several key components: + +1. **File Upload**: Documents are uploaded through the Files API +2. **Automatic Processing**: Files are automatically chunked and converted to embeddings +3. **Vector Storage**: Chunks are stored in vector databases with metadata +4. **Search & Retrieval**: Users can search through processed documents using natural language + +## Supported Vector Store Providers + +The following vector store providers support file operations: + +### Inline Providers (Single Node) + +- **FAISS**: Fast in-memory vector similarity search +- **SQLite-vec**: Disk-based storage with hybrid search capabilities + +### Remote Providers (Hosted) + +- **ChromaDB**: Vector database with metadata filtering +- **Weaviate**: Vector database with GraphQL interface +- **Postgres (PGVector)**: Vector extensions for PostgreSQL + +### Both Inline & Remote Providers +- **Milvus**: High-performance vector database with advanced indexing +- **Qdrant**: Vector similarity search with payload filtering + +## File Processing Pipeline + +### 1. File Upload + +```python +from llama_stack import LlamaStackClient + +client = LlamaStackClient("http://localhost:8000") + +# Upload a document +with open("document.pdf", "rb") as f: + file_info = await client.files.upload(file=f, purpose="assistants") +``` + +### 2. Attach to Vector Store + +```python +# Create a vector store +vector_store = client.vector_stores.create(name="my_documents") + +# Attach the file to the vector store +file_attach_response = await client.vector_stores.files.create( + vector_store_id=vector_store.id, file_id=file_info.id +) +``` + +### 3. Automatic Processing + +The system automatically: +- Detects the file type and extracts text content +- Splits content into chunks (default: 800 tokens with 400 token overlap) +- Generates embeddings for each chunk +- Stores chunks with metadata in the vector store +- Updates file status to "completed" + +### 4. Search and Retrieval + +```python +# Search through processed documents +search_results = await client.vector_stores.search( + vector_store_id=vector_store.id, + query="What is the main topic discussed?", + max_num_results=5, +) + +# Process results +for result in search_results.data: + print(f"Score: {result.score}") + for content in result.content: + print(f"Content: {content.text}") +``` + +## Supported File Types + +The FileResponse system supports various document formats: + +- **Text Files**: `.txt`, `.md`, `.rst` +- **Documents**: `.pdf`, `.docx`, `.doc` +- **Code**: `.py`, `.js`, `.java`, `.cpp`, etc. +- **Data**: `.json`, `.csv`, `.xml` +- **Web Content**: HTML files + +## Chunking Strategies + +### Default Strategy + +The default chunking strategy uses: +- **Max Chunk Size**: 800 tokens +- **Overlap**: 400 tokens +- **Method**: Semantic boundary detection + +### Custom Chunking + +You can customize chunking when attaching files: + +```python +from llama_stack.apis.vector_io import VectorStoreChunkingStrategy + +# Attach file with custom chunking +file_attach_response = await client.vector_stores.files.create( + vector_store_id=vector_store.id, + file_id=file_info.id, + chunking_strategy=chunking_strategy, +) +``` + +**Note**: While Llama Stack is OpenAI-compatible, it also supports additional options beyond the standard OpenAI API. When creating vector stores, you can specify custom embedding models and embedding dimensions that will be used when processing chunks from attached files. + + +## File Management + +### List Files in Vector Store + +```python +# List all files in a vector store +files = await client.vector_stores.files.list(vector_store_id=vector_store.id) + +for file in files: + print(f"File: {file.filename}, Status: {file.status}") +``` + +### File Status Tracking + +Files go through several statuses: +- **in_progress**: File is being processed +- **completed**: File successfully processed and searchable +- **failed**: Processing failed (check `last_error` for details) +- **cancelled**: Processing was cancelled + +### Retrieve File Content + +```python +# Get chunked content from vector store +content_response = await client.vector_stores.files.retrieve_content( + vector_store_id=vector_store.id, file_id=file_info.id +) + +for chunk in content_response.content: + print(f"Chunk {chunk.metadata.get('chunk_index', 0)}: {chunk.text}") +``` + +## Vector Store Management + +### List Vector Stores + +Retrieve a paginated list of all vector stores: + +```python +# List all vector stores with default pagination +vector_stores = await client.vector_stores.list() + +# Custom pagination and ordering +vector_stores = await client.vector_stores.list( + limit=10, + order="asc", # or "desc" + after="vs_12345678", # cursor-based pagination +) + +for store in vector_stores.data: + print(f"Store: {store.name}, Files: {store.file_counts.total}") + print(f"Created: {store.created_at}, Status: {store.status}") +``` + +### Retrieve Vector Store Details + +Get detailed information about a specific vector store: + +```python +# Get vector store details +store_details = await client.vector_stores.retrieve(vector_store_id="vs_12345678") + +print(f"Name: {store_details.name}") +print(f"Status: {store_details.status}") +print(f"File Counts: {store_details.file_counts}") +print(f"Usage: {store_details.usage_bytes} bytes") +print(f"Created: {store_details.created_at}") +print(f"Metadata: {store_details.metadata}") +``` + +### Update Vector Store + +Modify vector store properties such as name, metadata, or expiration settings: + +```python +# Update vector store name and metadata +updated_store = await client.vector_stores.update( + vector_store_id="vs_12345678", + name="Updated Document Collection", + metadata={ + "description": "Updated collection for research", + "category": "research", + "version": "2.0", + }, +) + +# Set expiration policy +expired_store = await client.vector_stores.update( + vector_store_id="vs_12345678", + expires_after={"anchor": "last_active_at", "days": 30}, +) + +print(f"Updated store: {updated_store.name}") +print(f"Last active: {updated_store.last_active_at}") +``` + +### Delete Vector Store + +Remove a vector store and all its associated data: + +```python +# Delete a vector store +delete_response = await client.vector_stores.delete(vector_store_id="vs_12345678") + +if delete_response.deleted: + print(f"Vector store {delete_response.id} successfully deleted") +else: + print("Failed to delete vector store") +``` + +**Important Notes:** +- Deleting a vector store removes all files, chunks, and embeddings +- This operation cannot be undone +- The underlying vector database is also cleaned up +- Consider backing up important data before deletion + +## Search Capabilities + +### Vector Search + +Pure similarity search using embeddings: + +```python +results = await client.vector_stores.search( + vector_store_id=vector_store.id, + query="machine learning algorithms", + max_num_results=10, +) +``` + +### Filtered Search + +Combine vector search with metadata filtering: + +```python +results = await client.vector_stores.search( + vector_store_id=vector_store.id, + query="machine learning algorithms", + filters={"file_type": "pdf", "upload_date": "2024-01-01"}, + max_num_results=10, +) +``` + +### Hybrid Search + +[SQLite-vec](../providers/vector_io/inline_sqlite-vec.mdx), [pgvector](../providers/vector_io/remote_pgvector.mdx), and [Milvus](../providers/vector_io/inline_milvus.mdx) support combining vector and keyword search. + +## Performance Considerations + +> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../providers/files/openai_file_operations_support.md#performance-considerations) in the provider documentation. + +**Key Points:** +- **Chunk Size**: 400-600 tokens for precision, 800-1200 for context +- **Storage**: Choose provider based on your performance needs +- **Search**: Optimize for your specific use case + +## Error Handling + +> **Note**: For comprehensive troubleshooting and error handling, see [Troubleshooting](../providers/files/openai_file_operations_support.md#troubleshooting) in the provider documentation. + +**Common Issues:** +- File processing failures (format, size limits) +- Search performance optimization +- Storage and memory issues + +## Best Practices + +> **Note**: For detailed best practices and recommendations, see [Best Practices](../providers/files/openai_file_operations_support.md#best-practices) in the provider documentation. + +**Key Recommendations:** +- File organization and naming conventions +- Chunking strategy optimization +- Metadata and monitoring practices +- Regular cleanup and maintenance + +## Integration Examples + +### RAG Application + +```python +# Build a RAG system with file uploads +async def build_rag_system(): + # Create vector store + vector_store = client.vector_stores.create(name="knowledge_base") + + # Upload and process documents + documents = ["doc1.pdf", "doc2.pdf", "doc3.pdf"] + for doc in documents: + with open(doc, "rb") as f: + file_info = await client.files.create(file=f, purpose="assistants") + await client.vector_stores.files.create( + vector_store_id=vector_store.id, file_id=file_info.id + ) + + return vector_store + + +# Query the RAG system +async def query_rag(vector_store_id, question): + results = await client.vector_stores.search( + vector_store_id=vector_store_id, query=question, max_num_results=5 + ) + return results +``` + +### Document Analysis + +```python +# Analyze document content through vector search +async def analyze_document(vector_store_id, file_id): + # Get document content + content = await client.vector_stores.files.retrieve_content( + vector_store_id=vector_store_id, file_id=file_id + ) + + # Search for specific topics + topics = ["introduction", "methodology", "conclusion"] + analysis = {} + + for topic in topics: + results = await client.vector_stores.search( + vector_store_id=vector_store_id, query=topic, max_num_results=3 + ) + analysis[topic] = results.data + + return analysis +``` + +## Next Steps + +- Explore the [Files API documentation](../../providers/files/files.mdx) for detailed API reference +- Check [Vector Store Providers](../providers/vector_io/index.mdx) for specific implementation details +- Review [Getting Started](../getting_started/quickstart.mdx) for quick setup instructions diff --git a/docs/docs/contributing/index.mdx b/docs/docs/contributing/index.mdx index 373f817f3..4ff56bcaf 100644 --- a/docs/docs/contributing/index.mdx +++ b/docs/docs/contributing/index.mdx @@ -1,232 +1,13 @@ -# Contributing to Llama Stack -We want to make contributing to this project as easy and transparent as -possible. +--- +title: Contributing +description: Contributing to Llama Stack +sidebar_label: Contributing to Llama Stack +sidebar_position: 3 +hide_title: true +--- -## Set up your development environment +import Contributing from '!!raw-loader!../../../CONTRIBUTING.md'; +import ReactMarkdown from 'react-markdown'; -We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments. -You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/). -You can install the dependencies by running: - -```bash -cd llama-stack -uv sync --group dev -uv pip install -e . -source .venv/bin/activate -``` - -```{note} -You can use a specific version of Python with `uv` by adding the `--python ` flag (e.g. `--python 3.12`). -Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`. -For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/). -``` - -Note that you can create a dotenv file `.env` that includes necessary environment variables: -``` -LLAMA_STACK_BASE_URL=http://localhost:8321 -LLAMA_STACK_CLIENT_LOG=debug -LLAMA_STACK_PORT=8321 -LLAMA_STACK_CONFIG= -TAVILY_SEARCH_API_KEY= -BRAVE_SEARCH_API_KEY= -``` - -And then use this dotenv file when running client SDK tests via the following: -```bash -uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py --text-model=meta-llama/Llama-3.1-8B-Instruct -``` - -### Pre-commit Hooks - -We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running: - -```bash -uv run pre-commit install -``` - -After that, pre-commit hooks will run automatically before each commit. - -Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running: - -```bash -uv run pre-commit run --all-files -``` - -```{caution} -Before pushing your changes, make sure that the pre-commit hooks have passed successfully. -``` - -## Discussions -> Issues -> Pull Requests - -We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md). - -If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later. - -### Issues -We use GitHub issues to track public bugs. Please ensure your description is -clear and has sufficient instructions to be able to reproduce the issue. - -Meta has a [bounty program](http://facebook.com/whitehat/info) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -### Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Meta's open source projects. - -Complete your CLA here: [https://code.facebook.com/cla](https://code.facebook.com/cla) - -**I'd like to contribute!** - -If you are new to the project, start by looking at the issues tagged with "good first issue". If you're interested -leave a comment on the issue and a triager will assign it to you. - -Please avoid picking up too many issues at once. This helps you stay focused and ensures that others in the community also have opportunities to contribute. -- Try to work on only 1–2 issues at a time, especially if you’re still getting familiar with the codebase. -- Before taking an issue, check if it’s already assigned or being actively discussed. -- If you’re blocked or can’t continue with an issue, feel free to unassign yourself or leave a comment so others can step in. - -**I have a bug!** - -1. Search the issue tracker and discussions for similar issues. -2. If you don't have steps to reproduce, open a discussion. -3. If you have steps to reproduce, open an issue. - -**I have an idea for a feature!** - -1. Open a discussion. - -**I've implemented a feature!** - -1. If there is an issue for the feature, open a pull request. -2. If there is no issue, open a discussion and link to your branch. - -**I have a question!** - -1. Open a discussion or use [Discord](https://discord.gg/llama-stack). - - -**Opening a Pull Request** - -1. Fork the repo and create your branch from `main`. -2. If you've changed APIs, update the documentation. -3. Ensure the test suite passes. -4. Make sure your code lints using `pre-commit`. -5. If you haven't already, complete the Contributor License Agreement ("CLA"). -6. Ensure your pull request follows the [conventional commits format](https://www.conventionalcommits.org/en/v1.0.0/). -7. Ensure your pull request follows the [coding style](#coding-style). - - -Please keep pull requests (PRs) small and focused. If you have a large set of changes, consider splitting them into logically grouped, smaller PRs to facilitate review and testing. - -```{tip} -As a general guideline: -- Experienced contributors should try to keep no more than 5 open PRs at a time. -- New contributors are encouraged to have only one open PR at a time until they’re familiar with the codebase and process. -``` - -## Repository guidelines - -### Coding Style - -* Comments should provide meaningful insights into the code. Avoid filler comments that simply - describe the next step, as they create unnecessary clutter, same goes for docstrings. -* Prefer comments to clarify surprising behavior and/or relationships between parts of the code - rather than explain what the next line of code does. -* Catching exceptions, prefer using a specific exception type rather than a broad catch-all like - `Exception`. -* Error messages should be prefixed with "Failed to ..." -* 4 spaces for indentation rather than tab -* When using `# noqa` to suppress a style or linter warning, include a comment explaining the - justification for bypassing the check. -* When using `# type: ignore` to suppress a mypy warning, include a comment explaining the - justification for bypassing the check. -* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or - readability reasons. -* Providers configuration class should be Pydantic Field class. It should have a `description` field - that describes the configuration. These descriptions will be used to generate the provider - documentation. -* When possible, use keyword arguments only when calling functions. -* Llama Stack utilizes custom Exception classes for certain Resources that should be used where applicable. - -### License -By contributing to Llama, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. - -## Common Tasks - -Some tips about common tasks you work on while contributing to Llama Stack: - -### Setup for development - -```bash -git clone https://github.com/meta-llama/llama-stack.git -cd llama-stack -uv run llama stack list-deps | xargs -L1 uv pip install - -# (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package. -git clone https://github.com/meta-llama/llama-stack-client-python.git -uv add --editable ../llama-stack-client-python -``` - -### Updating distribution configurations - -If you have made changes to a provider's configuration in any form (introducing a new config key, or -changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML -files as well as the documentation. You should not change `docs/source/.../distributions/` files -manually as they are auto-generated. - -### Updating the provider documentation - -If you have made changes to a provider's configuration, you should run `./scripts/provider_codegen.py` -to re-generate the documentation. You should not change `docs/source/.../providers/` files manually -as they are auto-generated. -Note that the provider "description" field will be used to generate the provider documentation. - -### Building the Documentation - -If you are making changes to the documentation at [https://llamastack.github.io/](https://llamastack.github.io/), you can use the following command to build the documentation and preview your changes. - -```bash -# This rebuilds the documentation pages and the OpenAPI spec. -npm install -npm run gen-api-docs all -npm run build - -# This will start a local server (usually at http://127.0.0.1:3000). -npm run serve -``` - -### Update API Documentation - -If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: - -```bash -uv run ./docs/openapi_generator/run_openapi_generator.sh -``` - -The generated API schema will be available in `docs/static/`. Make sure to review the changes before committing. - -## Adding a New Provider - -See: -- [Adding a New API Provider Page](./new_api_provider.mdx) which describes how to add new API providers to the Stack. -- [Vector Database Page](./new_vector_database.mdx) which describes how to add a new vector databases with Llama Stack. -- [External Provider Page](/docs/providers/external/) which describes how to add external providers to the Stack. - - -## Testing - - -See the [Testing README](https://github.com/meta-llama/llama-stack/blob/main/tests/README.md) for detailed testing information. - -## Advanced Topics - -For developers who need deeper understanding of the testing system internals: - -- [Record-Replay Testing](./testing/record-replay.mdx) - -### Benchmarking - -See the [Benchmarking README](https://github.com/meta-llama/llama-stack/blob/main/benchmarking/k8s-benchmark/README.md) for benchmarking information. +{Contributing} diff --git a/docs/docs/deploying/kubernetes_deployment.mdx b/docs/docs/deploying/kubernetes_deployment.mdx index 8ed1e2756..48d08f0db 100644 --- a/docs/docs/deploying/kubernetes_deployment.mdx +++ b/docs/docs/deploying/kubernetes_deployment.mdx @@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem'; # Kubernetes Deployment Guide -Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers both local development with Kind and production deployment on AWS EKS. +Deploy Llama Stack and vLLM servers in a Kubernetes cluster instead of running them locally. This guide covers deployment using the Kubernetes operator to manage the Llama Stack server with Kind. The vLLM inference server is deployed manually. ## Prerequisites @@ -110,115 +110,176 @@ spec: EOF ``` -### Step 3: Configure Llama Stack +### Step 3: Install Kubernetes Operator -Update your run configuration: - -```yaml -providers: - inference: - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://vllm-server.default.svc.cluster.local:8000/v1 - max_tokens: 4096 - api_token: fake -``` - -Build container image: +Install the Llama Stack Kubernetes operator to manage Llama Stack deployments: ```bash -tmp_dir=$(mktemp -d) && cat >$tmp_dir/Containerfile.llama-stack-run-k8s <-service`): + +```bash +# List services to find the service name +kubectl get services | grep llamastack + +# Port forward and test (replace SERVICE_NAME with the actual service name) +kubectl port-forward service/llamastack-vllm-service 8321:8321 +``` + +In another terminal, test the deployment: + +```bash +llama-stack-client --endpoint http://localhost:8321 inference chat-completion --message "hello, what model are you?" ``` ## Troubleshooting -**Check pod status:** +### vLLM Server Issues + +**Check vLLM pod status:** ```bash kubectl get pods -l app.kubernetes.io/name=vllm kubectl logs -l app.kubernetes.io/name=vllm ``` -**Test service connectivity:** +**Test vLLM service connectivity:** ```bash kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://vllm-server:8000/v1/models ``` +### Llama Stack Server Issues + +**Check LlamaStackDistribution status:** +```bash +# Get detailed status +kubectl describe llamastackdistribution llamastack-vllm + +# Check for events +kubectl get events --sort-by='.lastTimestamp' | grep llamastack-vllm +``` + +**Check operator-managed pods:** +```bash +# List all pods managed by the operator +kubectl get pods -l app.kubernetes.io/name=llama-stack + +# Check pod logs (replace POD_NAME with actual pod name) +kubectl logs -l app.kubernetes.io/name=llama-stack +``` + +**Check operator status:** +```bash +# Verify the operator is running +kubectl get pods -n llama-stack-operator-system + +# Check operator logs if issues persist +kubectl logs -n llama-stack-operator-system -l control-plane=controller-manager +``` + +**Verify service connectivity:** +```bash +# Get the service endpoint +kubectl get svc llamastack-vllm-service + +# Test connectivity from within the cluster +kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- curl http://llamastack-vllm-service:8321/health +``` + ## Related Resources - **[Deployment Overview](/docs/deploying/)** - Overview of deployment options - **[Distributions](/docs/distributions)** - Understanding Llama Stack distributions - **[Configuration](/docs/distributions/configuration)** - Detailed configuration options +- **[LlamaStack Operator](https://github.com/llamastack/llama-stack-k8s-operator)** - Overview of llama-stack kubernetes operator +- **[LlamaStackDistribution](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md)** - API Spec of the llama-stack operator Custom Resource. diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx index c4a01bf7d..532ffaaf0 100644 --- a/docs/docs/distributions/building_distro.mdx +++ b/docs/docs/distributions/building_distro.mdx @@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies: ```python -from llama_stack.providers.datatypes import ProviderSpec +from llama_stack_api.providers.datatypes import ProviderSpec def get_provider_spec() -> ProviderSpec: diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index bf3156865..46ecfa475 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -21,7 +21,6 @@ apis: - inference - vector_io - safety -- telemetry providers: inference: - provider_id: ollama @@ -51,10 +50,6 @@ providers: responses: backend: sql_default table_name: responses - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} storage: backends: kv_default: @@ -63,13 +58,21 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: {} model_id: ${env.INFERENCE_MODEL} @@ -92,7 +95,6 @@ apis: - inference - vector_io - safety -- telemetry ``` ## Providers @@ -219,7 +221,15 @@ models: ``` A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models. -What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. +What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier. + +**Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`. + +Examples: +- Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null` + → Access as: `ollama/llama3.2` +- Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b` + → Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier) If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below: @@ -589,24 +599,13 @@ created by users sharing a team with them: In addition to resource-based access control, Llama Stack supports endpoint-level authorization using OAuth 2.0 style scopes. When authentication is enabled, specific API endpoints require users to have particular scopes in their authentication token. -**Scope-Gated APIs:** -The following APIs are currently gated by scopes: - -- **Telemetry API** (scope: `telemetry.read`): - - `POST /telemetry/traces` - Query traces - - `GET /telemetry/traces/{trace_id}` - Get trace by ID - - `GET /telemetry/traces/{trace_id}/spans/{span_id}` - Get span by ID - - `POST /telemetry/spans/{span_id}/tree` - Get span tree - - `POST /telemetry/spans` - Query spans - - `POST /telemetry/metrics/{metric_name}` - Query metrics - **Authentication Configuration:** For **JWT/OAuth2 providers**, scopes should be included in the JWT's claims: ```json { "sub": "user123", - "scope": "telemetry.read", + "scope": "", "aud": "llama-stack" } ``` @@ -616,7 +615,7 @@ For **custom authentication providers**, the endpoint must return user attribute { "principal": "user123", "attributes": { - "scopes": ["telemetry.read"] + "scopes": [""] } } ``` diff --git a/docs/docs/distributions/importing_as_library.mdx b/docs/docs/distributions/importing_as_library.mdx index cf626d2c7..33f65f290 100644 --- a/docs/docs/distributions/importing_as_library.mdx +++ b/docs/docs/distributions/importing_as_library.mdx @@ -11,7 +11,7 @@ If you are planning to use an external service for Inference (even Ollama or TGI This avoids the overhead of setting up a server. ```bash # setup -uv pip install llama-stack +uv pip install llama-stack llama-stack-client llama stack list-deps starter | xargs -L1 uv pip install ``` diff --git a/docs/docs/distributions/index.mdx b/docs/docs/distributions/index.mdx index 0149f143f..ebf4bd6ce 100644 --- a/docs/docs/distributions/index.mdx +++ b/docs/docs/distributions/index.mdx @@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack. - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code - **[Configuration Reference](./configuration.mdx)** - Configuration file format details +- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index c71ab05d8..d0e083d29 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -8,7 +8,6 @@ data: - inference - files - safety - - telemetry - tool_runtime - vector_io providers: @@ -73,12 +72,6 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -113,13 +106,21 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: embedding_dimension: 768 diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index 863565fdf..d8306613b 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -5,7 +5,6 @@ apis: - inference - files - safety -- telemetry - tool_runtime - vector_io providers: @@ -32,21 +31,17 @@ providers: provider_type: remote::chromadb config: url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + table_name: files_metadata + backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -56,26 +51,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 tool_runtime: - provider_id: brave-search provider_type: remote::brave-search @@ -110,40 +94,54 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: - backend: kv_default namespace: registry + backend: kv_default inference: - backend: sql_default table_name: inference_store -models: -- metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: {} - model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - provider_id: vllm-safety - model_type: llm -shields: -- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 auth: provider_config: type: github_token +vector_stores: + default_provider_id: chromadb + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/docs/docs/distributions/k8s/ui-k8s.yaml.template b/docs/docs/distributions/k8s/ui-k8s.yaml.template index a6859cb86..21de94d12 100644 --- a/docs/docs/distributions/k8s/ui-k8s.yaml.template +++ b/docs/docs/distributions/k8s/ui-k8s.yaml.template @@ -44,7 +44,7 @@ spec: # Navigate to the UI directory echo "Navigating to UI directory..." - cd /app/llama_stack/ui + cd /app/llama_stack_ui # Check if package.json exists if [ ! -f "package.json" ]; then diff --git a/docs/docs/distributions/list_of_distributions.mdx b/docs/docs/distributions/list_of_distributions.mdx index 57fa6e85f..d763df0a5 100644 --- a/docs/docs/distributions/list_of_distributions.mdx +++ b/docs/docs/distributions/list_of_distributions.mdx @@ -28,7 +28,7 @@ Llama Stack provides several pre-configured distributions to help you get starte - Run locally with Ollama for development ```bash -docker pull llama-stack/distribution-starter +docker pull llamastack/distribution-starter ``` **Guides:** [Starter Distribution Guide](self_hosted_distro/starter) @@ -41,7 +41,7 @@ docker pull llama-stack/distribution-starter - Need to run inference locally ```bash -docker pull llama-stack/distribution-meta-reference-gpu +docker pull llamastack/distribution-meta-reference-gpu ``` **Guides:** [Meta Reference GPU Guide](self_hosted_distro/meta-reference-gpu) diff --git a/docs/docs/distributions/llama_stack_ui.mdx b/docs/docs/distributions/llama_stack_ui.mdx new file mode 100644 index 000000000..7ba47ea4d --- /dev/null +++ b/docs/docs/distributions/llama_stack_ui.mdx @@ -0,0 +1,109 @@ +--- +title: Llama Stack UI +description: Web-based user interface for interacting with Llama Stack servers +sidebar_label: Llama Stack UI +sidebar_position: 8 +--- + +# Llama Stack UI + +The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs. + +## Features + +- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity +- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows +- **Prompt Management**: Create and manage reusable prompts + +## Prerequisites + +You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend. + +If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide. + +## Running the UI + +### Option 1: Using npx (Recommended for Quick Start) + +The fastest way to get started is using `npx`: + +```bash +npx llama-stack-ui +``` + +This will start the UI server on `http://localhost:8322` (default port). + +### Option 2: Using Docker + +Run the UI in a container: + +```bash +docker run -p 8322:8322 llamastack/ui +``` + +Access the UI at `http://localhost:8322`. + +## Environment Variables + +The UI can be configured using the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` | +| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` | + +If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` | +| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - | +| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - | + +### Setting Environment Variables + +#### For npx: + +```bash +LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ +LLAMA_STACK_UI_PORT=8080 \ +npx llama-stack-ui +``` + +#### For Docker: + +```bash +docker run -p 8080:8080 \ + -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ + -e LLAMA_STACK_UI_PORT=8080 \ + llamastack/ui +``` + +## Using the UI + +### Managing Resources + +- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings +- **Prompts**: Create and manage reusable prompt templates +- **Chat Completions**: View history of chat interactions +- **Responses**: Browse detailed agent responses and tool calls + +## Development + +If you want to run the UI from source for development: + +```bash +# From the project root +cd src/llama_stack_ui + +# Install dependencies +npm install + +# Set environment variables +export LLAMA_STACK_BACKEND_URL=http://localhost:8321 + +# Start the development server +npm run dev +``` + +The development server will start on `http://localhost:8322` with hot reloading enabled. diff --git a/docs/docs/distributions/remote_hosted_distro/index.mdx b/docs/docs/distributions/remote_hosted_distro/index.mdx index ef5a83d8a..7fa9d1bf6 100644 --- a/docs/docs/distributions/remote_hosted_distro/index.mdx +++ b/docs/docs/distributions/remote_hosted_distro/index.mdx @@ -2,10 +2,10 @@ Remote-Hosted distributions are available endpoints serving Llama Stack API that you can directly connect to. -| Distribution | Endpoint | Inference | Agents | Memory | Safety | Telemetry | +| Distribution | Endpoint | Inference | Agents | Memory | Safety | |-------------|----------|-----------|---------|---------|---------|------------| -| Together | [https://llama-stack.together.ai](https://llama-stack.together.ai) | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | -| Fireworks | [https://llamastack-preview.fireworks.ai](https://llamastack-preview.fireworks.ai) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Together | [https://llama-stack.together.ai](https://llama-stack.together.ai) | remote::together | meta-reference | remote::weaviate | meta-reference | +| Fireworks | [https://llamastack-preview.fireworks.ai](https://llamastack-preview.fireworks.ai) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | ## Connecting to Remote-Hosted Distributions diff --git a/docs/docs/distributions/remote_hosted_distro/oci.md b/docs/docs/distributions/remote_hosted_distro/oci.md new file mode 100644 index 000000000..b13cf5f73 --- /dev/null +++ b/docs/docs/distributions/remote_hosted_distro/oci.md @@ -0,0 +1,143 @@ +--- +orphan: true +--- + +# OCI Distribution + +The `llamastack/distribution-oci` distribution consists of the following provider configurations. + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | +| files | `inline::localfs` | +| inference | `remote::oci` | +| safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | + + +### Environment Variables + +The following environment variables can be configured: + +- `OCI_AUTH_TYPE`: OCI authentication type (instance_principal or config_file) (default: `instance_principal`) +- `OCI_REGION`: OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1) (default: ``) +- `OCI_COMPARTMENT_OCID`: OCI compartment ID for the Generative AI service (default: ``) +- `OCI_CONFIG_FILE_PATH`: OCI config file path (required if OCI_AUTH_TYPE is config_file) (default: `~/.oci/config`) +- `OCI_CLI_PROFILE`: OCI CLI profile name to use from config file (default: `DEFAULT`) + + +## Prerequisites +### Oracle Cloud Infrastructure Setup + +Before using the OCI Generative AI distribution, ensure you have: + +1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/) +2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy +3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models +4. **Authentication**: Configure authentication using either: + - **Instance Principal** (recommended for cloud-hosted deployments) + - **API Key** (for on-premises or development environments) + +### Authentication Methods + +#### Instance Principal Authentication (Recommended) +Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments. + +Requirements: +- Instance must be running in an Oracle Cloud Infrastructure compartment +- Instance must have appropriate IAM policies to access Generative AI services + +#### API Key Authentication +For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file. + +### Required IAM Policies + +Ensure your OCI user or instance has the following policy statements: + +``` +Allow group to use generative-ai-inference-endpoints in compartment +Allow group to manage generative-ai-inference-endpoints in compartment +``` + +## Supported Services + +### Inference: OCI Generative AI +Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports: + +- **Chat Completions**: Conversational AI with context awareness +- **Text Generation**: Complete prompts and generate text content + +#### Available Models +Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models. + +### Safety: Llama Guard +For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide: +- Content filtering and moderation +- Policy compliance checking +- Harmful content detection + +### Vector Storage: Multiple Options +The distribution supports several vector storage providers: +- **FAISS**: Local in-memory vector search +- **ChromaDB**: Distributed vector database +- **PGVector**: PostgreSQL with vector extensions + +### Additional Services +- **Dataset I/O**: Local filesystem and Hugging Face integration +- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities +- **Evaluation**: Meta reference evaluation framework + +## Running Llama Stack with OCI + +You can run the OCI distribution via Docker or local virtual environment. + +### Via venv + +If you've set up your local development environment, you can also build the image using your local virtual environment. + +```bash +OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci +``` + +### Configuration Examples + +#### Using Instance Principal (Recommended for Production) +```bash +export OCI_AUTH_TYPE=instance_principal +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1.. +``` + +#### Using API Key Authentication (Development) +```bash +export OCI_AUTH_TYPE=config_file +export OCI_CONFIG_FILE_PATH=~/.oci/config +export OCI_CLI_PROFILE=DEFAULT +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id +``` + +## Regional Endpoints + +OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit: + +https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions + +## Troubleshooting + +### Common Issues + +1. **Authentication Errors**: Verify your OCI credentials and IAM policies +2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region +3. **Permission Denied**: Check compartment permissions and Generative AI service access +4. **Region Unavailable**: Verify the specified region supports Generative AI services + +### Getting Help + +For additional support: +- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm) +- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues) diff --git a/docs/docs/distributions/remote_hosted_distro/watsonx.md b/docs/docs/distributions/remote_hosted_distro/watsonx.md index 5add678f3..2ec7fe965 100644 --- a/docs/docs/distributions/remote_hosted_distro/watsonx.md +++ b/docs/docs/distributions/remote_hosted_distro/watsonx.md @@ -21,7 +21,6 @@ The `llamastack/distribution-watsonx` distribution consists of the following pro | inference | `remote::watsonx`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | | vector_io | `inline::faiss` | diff --git a/docs/docs/distributions/self_hosted_distro/dell-tgi.md b/docs/docs/distributions/self_hosted_distro/dell-tgi.md index 5fca297b0..a49bab4e6 100644 --- a/docs/docs/distributions/self_hosted_distro/dell-tgi.md +++ b/docs/docs/distributions/self_hosted_distro/dell-tgi.md @@ -13,9 +13,9 @@ self The `llamastack/distribution-tgi` distribution consists of the following provider configurations. -| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | -|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | -| **Provider(s)** | remote::tgi | meta-reference | meta-reference, remote::pgvector, remote::chroma | meta-reference | meta-reference | +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- | +| **Provider(s)** | remote::tgi | meta-reference | meta-reference, remote::pgvector, remote::chroma | meta-reference | The only difference vs. the `tgi` distribution is that it runs the Dell-TGI server for inference. diff --git a/docs/docs/distributions/self_hosted_distro/dell.md b/docs/docs/distributions/self_hosted_distro/dell.md index 040eb4a12..e30df5164 100644 --- a/docs/docs/distributions/self_hosted_distro/dell.md +++ b/docs/docs/distributions/self_hosted_distro/dell.md @@ -22,7 +22,6 @@ The `llamastack/distribution-dell` distribution consists of the following provid | inference | `remote::tgi`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index b7134b3e1..9c4095e88 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -79,6 +79,33 @@ docker run \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + llamastack/distribution-meta-reference-gpu \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +Available run configurations for this distribution: +- `run.yaml` +- `run-with-safety.yaml` + ### Via venv Make sure you have the Llama Stack CLI available. diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 4a7d99ff5..c48a7d391 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -127,13 +127,39 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-nvidia \ - --config /root/my-run.yaml \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ + llamastack/distribution-nvidia \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +Available run configurations for this distribution: +- `run.yaml` +- `run-with-safety.yaml` + ### Via venv If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. diff --git a/docs/docs/distributions/self_hosted_distro/passthrough.md b/docs/docs/distributions/self_hosted_distro/passthrough.md index 39f076be4..13e78a1ee 100644 --- a/docs/docs/distributions/self_hosted_distro/passthrough.md +++ b/docs/docs/distributions/self_hosted_distro/passthrough.md @@ -21,7 +21,6 @@ The `llamastack/distribution-passthrough` distribution consists of the following | inference | `remote::passthrough`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::rag-runtime`, `remote::model-context-protocol` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md index e04c5874b..37599a8dd 100644 --- a/docs/docs/distributions/self_hosted_distro/starter.md +++ b/docs/docs/distributions/self_hosted_distro/starter.md @@ -26,7 +26,6 @@ The starter distribution consists of the following provider configurations: | inference | `remote::openai`, `remote::fireworks`, `remote::together`, `remote::ollama`, `remote::anthropic`, `remote::gemini`, `remote::groq`, `remote::sambanova`, `remote::vllm`, `remote::tgi`, `remote::cerebras`, `remote::llama-openai-compat`, `remote::nvidia`, `remote::hf::serverless`, `remote::hf::endpoint`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | -| telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | | vector_io | `inline::faiss`, `inline::sqlite-vec`, `inline::milvus`, `remote::chromadb`, `remote::pgvector` | @@ -117,10 +116,6 @@ The following environment variables can be configured: - `BRAVE_SEARCH_API_KEY`: Brave Search API key - `TAVILY_SEARCH_API_KEY`: Tavily Search API key -### Telemetry Configuration -- `OTEL_SERVICE_NAME`: OpenTelemetry service name -- `TELEMETRY_SINKS`: Telemetry sinks (default: `[]`) - ## Enabling Providers You can enable specific providers by setting appropriate environment variables. For example, @@ -164,7 +159,41 @@ docker run \ --port $LLAMA_STACK_PORT ``` -### Via venv +The container will run the distribution with a SQLite store by default. This store is used for the following components: + +- Metadata store: store metadata about the models, providers, etc. +- Inference store: collect of responses from the inference provider +- Agents store: store agent configurations (sessions, turns, etc.) +- Agents Responses store: store responses from the agents + +However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration: + +```bash +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -e OPENAI_API_KEY=your_openai_key \ + -e FIREWORKS_API_KEY=your_fireworks_key \ + -e TOGETHER_API_KEY=your_together_key \ + -e POSTGRES_HOST=your_postgres_host \ + -e POSTGRES_PORT=your_postgres_port \ + -e POSTGRES_DB=your_postgres_db \ + -e POSTGRES_USER=your_postgres_user \ + -e POSTGRES_PASSWORD=your_postgres_password \ + llamastack/distribution-starter \ + starter::run-with-postgres-store.yaml +``` + +Postgres environment variables: + +- `POSTGRES_HOST`: Postgres host (default: `localhost`) +- `POSTGRES_PORT`: Postgres port (default: `5432`) +- `POSTGRES_DB`: Postgres database name (default: `llamastack`) +- `POSTGRES_USER`: Postgres username (default: `llamastack`) +- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`) + +### Via Conda or venv Ensure you have configured the starter distribution using the environment variables explained above. @@ -172,8 +201,11 @@ Ensure you have configured the starter distribution using the environment variab # Install dependencies for the starter distribution uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install -# Run the server +# Run the server (with SQLite - default) uv run --with llama-stack llama stack run starter + +# Or run with PostgreSQL +uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml ``` ## Example Usage @@ -229,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components: 2. **Flexible Configuration**: Easy to enable/disable providers based on your needs 3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware 4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed -5. **Production Ready**: Includes safety, evaluation, and telemetry components +5. **Production Ready**: Includes safety and evaluation 6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends. diff --git a/docs/docs/distributions/starting_llama_stack_server.mdx b/docs/docs/distributions/starting_llama_stack_server.mdx index 20bcfa1e4..ed1964444 100644 --- a/docs/docs/distributions/starting_llama_stack_server.mdx +++ b/docs/docs/distributions/starting_llama_stack_server.mdx @@ -27,7 +27,7 @@ If you have built a container image and want to deploy it in a Kubernetes cluste Control log output via environment variables before starting the server. -- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`. +- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug,core=info`. - Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`. - Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=` to apply globally. - `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout. diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx index c629e26f1..2816f67a2 100644 --- a/docs/docs/getting_started/detailed_tutorial.mdx +++ b/docs/docs/getting_started/detailed_tutorial.mdx @@ -144,7 +144,7 @@ source .venv/bin/activate ```bash uv venv client --python 3.12 source client/bin/activate -pip install llama-stack-client +uv pip install llama-stack-client ``` @@ -239,8 +239,13 @@ client = LlamaStackClient(base_url="http://localhost:8321") models = client.models.list() # Select the first LLM -llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama") -model_id = llm.identifier +llm = next( + m for m in models + if m.custom_metadata + and m.custom_metadata.get("model_type") == "llm" + and m.custom_metadata.get("provider_id") == "ollama" +) +model_id = llm.id print("Model:", model_id) @@ -279,8 +284,13 @@ import uuid client = LlamaStackClient(base_url=f"http://localhost:8321") models = client.models.list() -llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama") -model_id = llm.identifier +llm = next( + m for m in models + if m.custom_metadata + and m.custom_metadata.get("model_type") == "llm" + and m.custom_metadata.get("provider_id") == "ollama" +) +model_id = llm.id agent = Agent(client, model=model_id, instructions="You are a helpful assistant.") @@ -450,8 +460,11 @@ import uuid client = LlamaStackClient(base_url="http://localhost:8321") # Create a vector database instance -embed_lm = next(m for m in client.models.list() if m.model_type == "embedding") -embedding_model = embed_lm.identifier +embed_lm = next( + m for m in client.models.list() + if m.custom_metadata and m.custom_metadata.get("model_type") == "embedding" +) +embedding_model = embed_lm.id vector_db_id = f"v{uuid.uuid4().hex}" # The VectorDB API is deprecated; the server now returns its own authoritative ID. # We capture the correct ID from the response's .identifier attribute. @@ -489,9 +502,11 @@ client.tool_runtime.rag_tool.insert( llm = next( m for m in client.models.list() - if m.model_type == "llm" and m.provider_id == "ollama" + if m.custom_metadata + and m.custom_metadata.get("model_type") == "llm" + and m.custom_metadata.get("provider_id") == "ollama" ) -model = llm.identifier +model = llm.id # Create the RAG agent rag_agent = Agent( diff --git a/docs/docs/getting_started/quickstart.mdx b/docs/docs/getting_started/quickstart.mdx index ec929eb88..0761a6e9b 100644 --- a/docs/docs/getting_started/quickstart.mdx +++ b/docs/docs/getting_started/quickstart.mdx @@ -24,6 +24,9 @@ ollama run llama3.2:3b --keepalive 60m #### Step 2: Run the Llama Stack server +```python file=./demo_script.py title="demo_script.py" +``` + We will use `uv` to install dependencies and run the Llama Stack server. ```bash # Install dependencies for the starter distribution @@ -35,27 +38,6 @@ OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run star #### Step 3: Run the demo Now open up a new terminal and copy the following script into a file named `demo_script.py`. -```python -import io, requests -from openai import OpenAI - -url="https://www.paulgraham.com/greatwork.html" -client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none") - -vs = client.vector_stores.create() -response = requests.get(url) -pseudo_file = io.BytesIO(str(response.content).encode('utf-8')) -uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants") -client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id) - -resp = client.responses.create( - model="openai/gpt-4o", - input="How do you do great work? Use the existing knowledge_search tool.", - tools=[{"type": "file_search", "vector_store_ids": [vs.id]}], - include=["file_search_call.results"], -) - - We will use `uv` to run the script ``` uv run --with llama-stack-client,fire,requests demo_script.py diff --git a/docs/docs/index.mdx b/docs/docs/index.mdx index 80b288872..8c17283f9 100644 --- a/docs/docs/index.mdx +++ b/docs/docs/index.mdx @@ -29,7 +29,7 @@ Llama Stack is now available! See the [release notes](https://github.com/llamast Llama Stack defines and standardizes the core building blocks needed to bring generative AI applications to market. It provides a unified set of APIs with implementations from leading service providers, enabling seamless transitions between development and production environments. More specifically, it provides: -- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. +- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals. - **Plugin architecture** to support the rich ecosystem of implementations of the different APIs in different environments like local development, on-premises, cloud, and mobile. - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment - **Multiple developer interfaces** like CLI and SDKs for Python, Node, iOS, and Android diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 06eb104af..1f7e0c788 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,8 @@ --- -description: "Agents +description: | + Agents - APIs for creating and interacting with agentic systems." + APIs for creating and interacting with agentic systems. sidebar_label: Agents title: Agents --- @@ -12,6 +13,6 @@ title: Agents Agents - APIs for creating and interacting with agentic systems. +APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx index fac9b8406..99a67feb4 100644 --- a/docs/docs/providers/agents/inline_meta-reference.mdx +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `AgentPersistenceConfig` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 2c64b277f..23b7df14b 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,15 @@ --- -description: "The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +description: | + The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes." + Note: This API is currently under active development and may undergo changes. sidebar_label: Batches title: Batches --- @@ -18,14 +19,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx index 45304fbb1..0a062c245 100644 --- a/docs/docs/providers/batches/inline_reference.mdx +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -14,9 +14,9 @@ Reference implementation of batches API with KVStore persistence. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | Configuration for the key-value store backend. | -| `max_concurrent_batches` | `` | No | 1 | Maximum number of concurrent batches to process simultaneously. | -| `max_concurrent_requests_per_batch` | `` | No | 10 | Maximum number of concurrent requests to process per batch. | +| `kvstore` | `KVStoreReference` | No | | Configuration for the key-value store backend. | +| `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. | +| `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx index a9363376c..4314696c5 100644 --- a/docs/docs/providers/datasetio/inline_localfs.mdx +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx index de3ffaaa6..ede8ed631 100644 --- a/docs/docs/providers/datasetio/remote_huggingface.mdx +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/remote_nvidia.mdx b/docs/docs/providers/datasetio/remote_nvidia.mdx index 35a7dacee..97c48d810 100644 --- a/docs/docs/providers/datasetio/remote_nvidia.mdx +++ b/docs/docs/providers/datasetio/remote_nvidia.mdx @@ -17,7 +17,7 @@ NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform | `api_key` | `str \| None` | No | | The NVIDIA API key. | | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. | -| `datasets_url` | `` | No | http://nemo.test | Base URL for the NeMo Dataset API | +| `datasets_url` | `str` | No | http://nemo.test | Base URL for the NeMo Dataset API | ## Sample Configuration diff --git a/docs/docs/providers/eval/index.mdx b/docs/docs/providers/eval/index.mdx index 94bafe15e..a6e35d611 100644 --- a/docs/docs/providers/eval/index.mdx +++ b/docs/docs/providers/eval/index.mdx @@ -1,7 +1,8 @@ --- -description: "Evaluations +description: | + Evaluations - Llama Stack Evaluation API for running evaluations on model and agent candidates." + Llama Stack Evaluation API for running evaluations on model and agent candidates. sidebar_label: Eval title: Eval --- @@ -12,6 +13,6 @@ title: Eval Evaluations - Llama Stack Evaluation API for running evaluations on model and agent candidates. +Llama Stack Evaluation API for running evaluations on model and agent candidates. This section contains documentation for all available providers for the **eval** API. diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx index 2c86c18c9..f1e923ee8 100644 --- a/docs/docs/providers/eval/inline_meta-reference.mdx +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/eval/remote_nvidia.mdx b/docs/docs/providers/eval/remote_nvidia.mdx index 36bb4726b..311496791 100644 --- a/docs/docs/providers/eval/remote_nvidia.mdx +++ b/docs/docs/providers/eval/remote_nvidia.mdx @@ -14,7 +14,7 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `evaluator_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | +| `evaluator_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | ## Sample Configuration diff --git a/docs/docs/providers/external/external-providers-guide.mdx b/docs/docs/providers/external/external-providers-guide.mdx index 748fd62c0..dc813c75b 100644 --- a/docs/docs/providers/external/external-providers-guide.mdx +++ b/docs/docs/providers/external/external-providers-guide.mdx @@ -80,7 +80,7 @@ container_image: custom-vector-store:latest # optional All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like: ```python -from llama_stack.providers.datatypes import ( +from llama_stack_api.providers.datatypes import ( ProviderSpec, Api, RemoteProviderSpec, diff --git a/docs/docs/providers/files/files.mdx b/docs/docs/providers/files/files.mdx new file mode 100644 index 000000000..095642be3 --- /dev/null +++ b/docs/docs/providers/files/files.mdx @@ -0,0 +1,290 @@ +--- +sidebar_label: Files +title: Files +--- + +## Overview + +The Files API provides file management capabilities for Llama Stack. It allows you to upload, store, retrieve, and manage files that can be used across various endpoints in your application. + +## Features + +- **File Upload**: Upload files with metadata and purpose classification +- **File Management**: List, retrieve, and delete files +- **Content Retrieval**: Access raw file content for processing +- **API Compatibility**: Full compatibility with OpenAI Files API endpoints +- **Flexible Storage**: Support for local filesystem and cloud storage backends + +## API Endpoints + +### Upload File + +**POST** `/v1/openai/v1/files` + +Upload a file that can be used across various endpoints. + +**Request Body:** +- `file`: The file object to be uploaded (multipart form data) +- `purpose`: The intended purpose of the uploaded file + +**Supported Purposes:** +- `batch`: Files for batch operations + +**Response:** +```json +{ + "id": "file-abc123", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "mydata.jsonl", + "purpose": "batch" +} +``` + +**Example:** +```python +import requests + +with open("data.jsonl", "rb") as f: + files = {"file": f} + data = {"purpose": "batch"} + response = requests.post( + "http://localhost:8000/v1/openai/v1/files", files=files, data=data + ) + file_info = response.json() +``` + +### List Files + +**GET** `/v1/openai/v1/files` + +Returns a list of files that belong to the user's organization. + +**Query Parameters:** +- `after` (optional): A cursor for pagination +- `limit` (optional): Limit on number of objects (1-10,000, default: 10,000) +- `order` (optional): Sort order by created_at timestamp (`asc` or `desc`, default: `desc`) +- `purpose` (optional): Filter files by purpose + +**Response:** +```json +{ + "object": "list", + "data": [ + { + "id": "file-abc123", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "mydata.jsonl", + "purpose": "fine-tune" + } + ], + "has_more": false +} +``` + +**Example:** +```python +import requests + +# List all files +response = requests.get("http://localhost:8000/v1/openai/v1/files") +files = response.json() + +# List files with pagination +response = requests.get( + "http://localhost:8000/v1/openAi/v1/files", + params={"limit": 10, "after": "file-abc123"}, +) +files = response.json() + +# Filter by purpose +response = requests.get( + "http://localhost:8000/v1/openAi/v1/files", params={"purpose": "fine-tune"} +) +files = response.json() +``` + +### Retrieve File + +**GET** `/v1/openAi/v1/files/{file_id}` + +Returns information about a specific file. + +**Path Parameters:** +- `file_id`: The ID of the file to retrieve + +**Response:** +```json +{ + "id": "file-abc123", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "filename": "mydata.jsonl", + "purpose": "fine-tune" +} +``` + +**Example:** +```python +import requests + +file_id = "file-abc123" +response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}") +file_info = response.json() +``` + +### Delete File + +**DELETE** `/v1/openAi/v1/files/{file_id}` + +Delete a file. + +**Path Parameters:** +- `file_id`: The ID of the file to delete + +**Response:** +```json +{ + "id": "file-abc123", + "object": "file", + "deleted": true +} +``` + +**Example:** +```python +import requests + +file_id = "file-abc123" +response = requests.delete(f"http://localhost:8000/v1/openAi/v1/files/{file_id}") +result = response.json() +``` + +### Retrieve File Content + +**GET** `/v1/openAi/v1/files/{file_id}/content` + +Returns the raw file content as a binary response. + +**Path Parameters:** +- `file_id`: The ID of the file to retrieve content from + +**Response:** +Binary file content with appropriate headers: +- `Content-Type`: `application/octet-stream` +- `Content-Disposition`: `attachment; filename="filename"` + +**Example:** +```python +import requests + +file_id = "file-abc123" +response = requests.get(f"http://localhost:8000/v1/openAi/v1/files/{file_id}/content") + +# Save content to file +with open("downloaded_file.jsonl", "wb") as f: + f.write(response.content) + +# Or process content directly +content = response.content +``` + +## Vector Store Integration + +The Files API integrates with Vector Stores to enable document processing and search. For detailed information about this integration, see [File Operations and Vector Store Integration](../concepts/file_operations_vector_stores.md). + +### Vector Store File Operations + +**List Vector Store Files:** +- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files` + +**Retrieve Vector Store File Content:** +- **GET** `/v1/openAi/v1/vector_stores/{vector_store_id}/files/{file_id}/content` + +**Attach File to Vector Store:** +- **POST** `/v1/openAi/v1/vector_stores/{vector_store_id}/files` + +## Error Handling + +The Files API returns standard HTTP status codes and error responses: + +- `400 Bad Request`: Invalid request parameters +- `404 Not Found`: File not found +- `429 Too Many Requests`: Rate limit exceeded +- `500 Internal Server Error`: Server error + +**Error Response Format:** +```json +{ + "error": { + "message": "Error description", + "type": "invalid_request_error", + "code": "file_not_found" + } +} +``` + +## Rate Limits + +The Files API implements rate limiting to ensure fair usage: +- File uploads: 100 files per minute +- File retrievals: 1000 requests per minute +- File deletions: 100 requests per minute + +## Best Practices + +1. **File Organization**: Use descriptive filenames and appropriate purpose classifications +2. **Batch Operations**: For multiple files, consider using batch endpoints when available +3. **Error Handling**: Always check response status codes and handle errors gracefully +4. **Content Types**: Ensure files are uploaded with appropriate content types +5. **Cleanup**: Regularly delete unused files to manage storage costs + +## Integration Examples + +### With Python Client + +```python +from llama_stack import LlamaStackClient + +client = LlamaStackClient("http://localhost:8000") + +# Upload a file +with open("data.jsonl", "rb") as f: + file_info = await client.files.upload(file=f, purpose="fine-tune") + +# List files +files = await client.files.list(purpose="fine-tune") + +# Retrieve file content +content = await client.files.retrieve_content(file_info.id) +``` + +### With cURL + +```bash +# Upload file +curl -X POST http://localhost:8000/v1/openAi/v1/files \ + -F "file=@data.jsonl" \ + -F "purpose=fine-tune" + +# List files +curl http://localhost:8000/v1/openAi/v1/files + +# Download file content +curl http://localhost:8000/v1/openAi/v1/files/file-abc123/content \ + -o downloaded_file.jsonl +``` + +## Provider Support + +The Files API supports multiple storage backends: + +- **Local Filesystem**: Store files on local disk (inline provider) +- **S3**: Store files in AWS S3 or S3-compatible services (remote provider) +- **Custom Backends**: Extensible architecture for custom storage providers + +See the [Files Providers](index.md) documentation for detailed configuration options. diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx index 19e338035..0540c5c3e 100644 --- a/docs/docs/providers/files/index.mdx +++ b/docs/docs/providers/files/index.mdx @@ -1,7 +1,8 @@ --- -description: "Files +description: | + Files - This API is used to upload documents that can be used with other Llama Stack APIs." + This API is used to upload documents that can be used with other Llama Stack APIs. sidebar_label: Files title: Files --- @@ -12,6 +13,6 @@ title: Files Files - This API is used to upload documents that can be used with other Llama Stack APIs. +This API is used to upload documents that can be used with other Llama Stack APIs. This section contains documentation for all available providers for the **files** API. diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx index bff0c4eb9..aa3a9232b 100644 --- a/docs/docs/providers/files/inline_localfs.mdx +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -14,9 +14,9 @@ Local filesystem-based file storage provider for managing files and documents lo | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `storage_dir` | `` | No | | Directory to store uploaded files | -| `metadata_store` | `` | No | | SQL store configuration for file metadata | -| `ttl_secs` | `` | No | 31536000 | | +| `storage_dir` | `str` | No | | Directory to store uploaded files | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | +| `ttl_secs` | `int` | No | 31536000 | | ## Sample Configuration diff --git a/docs/docs/providers/files/openai_file_operations_quick_reference.md b/docs/docs/providers/files/openai_file_operations_quick_reference.md new file mode 100644 index 000000000..43e2318e2 --- /dev/null +++ b/docs/docs/providers/files/openai_file_operations_quick_reference.md @@ -0,0 +1,80 @@ +# File Operations Quick Reference + +## Overview + +As of release 0.2.14, Llama Stack provides comprehensive file operations and Vector Store API integration, following the [OpenAI Vector Store Files API specification](https://platform.openai.com/docs/api-reference/vector-stores-files). + +> **Note**: For detailed overview and implementation details, see [Overview](../openai_file_operations_support.md#overview) in the full documentation. + +## Supported Providers + +> **Note**: For complete provider details and features, see [Supported Providers](../openai_file_operations_support.md#supported-providers) in the full documentation. + +**Inline Providers**: FAISS, SQLite-vec, Milvus +**Remote Providers**: ChromaDB, Qdrant, Weaviate, PGVector + +## Quick Start + +### 1. Upload File +```python +file_info = await client.files.upload( + file=open("document.pdf", "rb"), purpose="assistants" +) +``` + +### 2. Create Vector Store +```python +vector_store = client.vector_stores.create(name="my_docs") +``` + +### 3. Attach File +```python +await client.vector_stores.files.create( + vector_store_id=vector_store.id, file_id=file_info.id +) +``` + +### 4. Search +```python +results = await client.vector_stores.search( + vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5 +) +``` + +## File Processing & Search + +**Processing**: 800 tokens default chunk size, 400 token overlap +**Formats**: PDF, DOCX, TXT, Code files, etc. +**Search**: Vector similarity, Hybrid (SQLite-vec), Filtered with metadata + +## Configuration + +> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation. + +**Basic Setup**: Configure vector_io and files providers in your run.yaml + +## Common Use Cases + +- **RAG Systems**: Document Q&A with file uploads +- **Knowledge Bases**: Searchable document collections +- **Content Analysis**: Document similarity and clustering +- **Research Tools**: Literature review and analysis + +## Performance Tips + +> **Note**: For detailed performance optimization strategies, see [Performance Considerations](../openai_file_operations_support.md#performance-considerations) in the full documentation. + +**Quick Tips**: Choose provider based on your needs (speed vs. storage vs. scalability) + +## Troubleshooting + +> **Note**: For comprehensive troubleshooting, see [Troubleshooting](../openai_file_operations_support.md#troubleshooting) in the full documentation. + +**Quick Fixes**: Check file format compatibility, optimize chunk sizes, monitor storage + +## Resources + +- [Full Documentation](openai_file_operations_support.md) +- [Integration Guide](../concepts/file_operations_vector_stores.md) +- [Files API](files_api.md) +- [Provider Details](../vector_io/index.md) diff --git a/docs/docs/providers/files/openai_file_operations_support.md b/docs/docs/providers/files/openai_file_operations_support.md new file mode 100644 index 000000000..058c994da --- /dev/null +++ b/docs/docs/providers/files/openai_file_operations_support.md @@ -0,0 +1,291 @@ +# File Operations Support in Vector Store Providers + +## Overview + +This document provides a comprehensive overview of file operations and Vector Store API support across all available vector store providers in Llama Stack. As of release 0.2.24, the following providers support full file operations integration. + +## Supported Providers + +### ✅ Full File Operations Support + +The following providers support complete file operations integration, including file upload, automatic processing, and search: + +#### Inline Providers (Single Node) + +| Provider | File Operations | Key Features | +|----------|----------------|--------------| +| **FAISS** | ✅ Full Support | Fast in-memory search, GPU acceleration | +| **SQLite-vec** | ✅ Full Support | Hybrid search, disk-based storage | +| **Milvus** | ✅ Full Support | High-performance, scalable indexing | + +#### Remote Providers (Hosted) + +| Provider | File Operations | Key Features | +|----------|----------------|--------------| +| **ChromaDB** | ✅ Full Support | Metadata filtering, persistent storage | +| **Qdrant** | ✅ Full Support | Payload filtering, advanced search | +| **Weaviate** | ✅ Full Support | GraphQL interface, schema management | +| **Postgres (PGVector)** | ✅ Full Support | SQL integration, ACID compliance | + +### 🔄 Partial Support + +Some providers may support basic vector operations but lack full file operations integration: + +| Provider | Status | Notes | +|----------|--------|-------| +| **Meta Reference** | 🔄 Basic | Core vector operations only | + +## File Operations Features + +All supported providers offer the following file operations capabilities: + +### Core Functionality + +- **File Upload & Processing**: Automatic document ingestion and chunking +- **Vector Storage**: Embedding generation and storage +- **Search & Retrieval**: Semantic search with metadata filtering +- **File Management**: List, retrieve, and manage files in vector stores + +### Advanced Features + +- **Automatic Chunking**: Configurable chunk sizes and overlap +- **Metadata Preservation**: File attributes and chunk metadata +- **Status Tracking**: Monitor file processing progress +- **Error Handling**: Comprehensive error reporting and recovery + +## Implementation Details + +### File Processing Pipeline + +1. **Upload**: File uploaded via Files API +2. **Extraction**: Text content extracted from various formats +3. **Chunking**: Content split into optimal chunks (default: 800 tokens) +4. **Embedding**: Chunks converted to vector embeddings +5. **Storage**: Vectors stored with metadata in vector database +6. **Indexing**: Search index updated for fast retrieval + +### Supported File Formats + +- **Documents**: PDF, DOCX, DOC +- **Text**: TXT, MD, RST +- **Code**: Python, JavaScript, Java, C++, etc. +- **Data**: JSON, CSV, XML +- **Web**: HTML files + +### Chunking Strategies + +- **Default**: 800 tokens with 400 token overlap +- **Custom**: Configurable chunk sizes and overlap +- **Static**: Fixed-size chunks with overlap + +## Provider-Specific Features + +### FAISS + +- **Storage**: In-memory with optional persistence +- **Performance**: Optimized for speed and GPU acceleration +- **Use Case**: High-performance, memory-constrained environments + +### SQLite-vec + +- **Storage**: Disk-based with SQLite backend +- **Search**: Hybrid vector + keyword search +- **Use Case**: Large document collections, frequent updates + +### Milvus + +- **Storage**: Scalable distributed storage +- **Indexing**: Multiple index types (IVF, HNSW) +- **Use Case**: Production deployments, large-scale applications + +### ChromaDB + +- **Storage**: Persistent storage with metadata +- **Filtering**: Advanced metadata filtering +- **Use Case**: Applications requiring rich metadata + +### Qdrant + +- **Storage**: High-performance vector database +- **Filtering**: Payload-based filtering +- **Use Case**: Real-time applications, complex queries + +### Weaviate + +- **Storage**: GraphQL-native vector database +- **Schema**: Flexible schema management +- **Use Case**: Applications requiring complex data relationships + +### Postgres (PGVector) + +- **Storage**: SQL database with vector extensions +- **Integration**: ACID compliance, existing SQL workflows +- **Use Case**: Applications requiring transactional guarantees + +## Configuration Examples + +### Basic Configuration + +```yaml +vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ~/.llama/faiss_store.db +``` + +### With FileResponse Support + +```yaml +vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ~/.llama/faiss_store.db + +files: + - provider_id: local-files + provider_type: inline::localfs + config: + storage_dir: ~/.llama/files + metadata_store: + type: sqlite + db_path: ~/.llama/files_metadata.db +``` + +## Usage Examples + +### Python Client + +```python +from llama_stack import LlamaStackClient + +client = LlamaStackClient("http://localhost:8000") + +# Create vector store +vector_store = client.vector_stores.create(name="documents") + +# Upload and process file +with open("document.pdf", "rb") as f: + file_info = await client.files.upload(file=f, purpose="assistants") + +# Attach to vector store +await client.vector_stores.files.create( + vector_store_id=vector_store.id, file_id=file_info.id +) + +# Search +results = await client.vector_stores.search( + vector_store_id=vector_store.id, query="What is the main topic?", max_num_results=5 +) +``` + +### cURL Commands + +```bash +# Upload file +curl -X POST http://localhost:8000/v1/openai/v1/files \ + -F "file=@document.pdf" \ + -F "purpose=assistants" + +# Create vector store +curl -X POST http://localhost:8000/v1/openai/v1/vector_stores \ + -H "Content-Type: application/json" \ + -d '{"name": "documents"}' + +# Attach file to vector store +curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/files \ + -H "Content-Type: application/json" \ + -d '{"file_id": "file-abc123"}' + +# Search vector store +curl -X POST http://localhost:8000/v1/openai/v1/vector_stores/{store_id}/search \ + -H "Content-Type: application/json" \ + -d '{"query": "What is the main topic?", "max_num_results": 5}' +``` + +## Performance Considerations + +### Chunk Size Optimization + +- **Small chunks (400-600 tokens)**: Better precision, more results +- **Large chunks (800-1200 tokens)**: Better context, fewer results +- **Overlap (50%)**: Maintains context between chunks + +### Storage Efficiency + +- **FAISS**: Fastest, but memory-limited +- **SQLite-vec**: Good balance of performance and storage +- **Milvus**: Scalable, production-ready +- **Remote providers**: Managed, but network-dependent + +### Search Performance + +- **Vector search**: Fastest for semantic queries +- **Hybrid search**: Best accuracy (SQLite-vec only) +- **Filtered search**: Fast with metadata constraints + +## Troubleshooting + +### Common Issues + +1. **File Processing Failures** + - Check file format compatibility + - Verify file size limits + - Review error messages in file status + +2. **Search Performance** + - Optimize chunk sizes for your use case + - Use filters to narrow search scope + - Monitor vector store metrics + +3. **Storage Issues** + - Check available disk space + - Verify database permissions + - Monitor memory usage (for in-memory providers) + +### Monitoring + +```python +# Check file processing status +file_status = await client.vector_stores.files.retrieve( + vector_store_id=vector_store.id, file_id=file_info.id +) + +if file_status.status == "failed": + print(f"Error: {file_status.last_error.message}") + +# Monitor vector store health +health = await client.vector_stores.health(vector_store_id=vector_store.id) +print(f"Status: {health.status}") +``` + +## Best Practices + +1. **File Organization**: Use descriptive names and organize by purpose +2. **Chunking Strategy**: Test different sizes for your specific use case +3. **Metadata**: Add relevant attributes for better filtering +4. **Monitoring**: Track processing status and search performance +5. **Cleanup**: Regularly remove unused files to manage storage + +## Future Enhancements + +Planned improvements for file operations support: + +- **Batch Processing**: Process multiple files simultaneously +- **Advanced Chunking**: More sophisticated chunking algorithms +- **Custom Embeddings**: Support for custom embedding models +- **Real-time Updates**: Live file processing and indexing +- **Multi-format Support**: Enhanced file format support + +## Support and Resources + +- **Documentation**: [File Operations and Vector Store Integration](../../concepts/file_operations_vector_stores.mdx) +- **API Reference**: [Files API](files_api.md) +- **Provider Docs**: [Vector Store Providers](../vector_io/index.md) +- **Examples**: [Getting Started](../getting_started/index.md) +- **Community**: [GitHub Discussions](https://github.com/meta-llama/llama-stack/discussions) diff --git a/docs/docs/providers/files/remote_openai.mdx b/docs/docs/providers/files/remote_openai.mdx new file mode 100644 index 000000000..48fe2fd57 --- /dev/null +++ b/docs/docs/providers/files/remote_openai.mdx @@ -0,0 +1,27 @@ +--- +description: "OpenAI Files API provider for managing files through OpenAI's native file storage service." +sidebar_label: Remote - Openai +title: remote::openai +--- + +# remote::openai + +## Description + +OpenAI Files API provider for managing files through OpenAI's native file storage service. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str` | No | | OpenAI API key for authentication | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | + +## Sample Configuration + +```yaml +api_key: ${env.OPENAI_API_KEY} +metadata_store: + table_name: openai_files_metadata + backend: sql_default +``` diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx index 65cd545c5..857ba1819 100644 --- a/docs/docs/providers/files/remote_s3.mdx +++ b/docs/docs/providers/files/remote_s3.mdx @@ -14,13 +14,13 @@ AWS S3-based file storage provider for scalable cloud file management with metad | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `bucket_name` | `` | No | | S3 bucket name to store files | -| `region` | `` | No | us-east-1 | AWS region where the bucket is located | +| `bucket_name` | `str` | No | | S3 bucket name to store files | +| `region` | `str` | No | us-east-1 | AWS region where the bucket is located | | `aws_access_key_id` | `str \| None` | No | | AWS access key ID (optional if using IAM roles) | | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | -| `auto_create_bucket` | `` | No | False | Automatically create the S3 bucket if it doesn't exist | -| `metadata_store` | `` | No | | SQL store configuration for file metadata | +| `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | ## Sample Configuration diff --git a/docs/docs/providers/index.mdx b/docs/docs/providers/index.mdx index 2ca2b2697..5c81a57ed 100644 --- a/docs/docs/providers/index.mdx +++ b/docs/docs/providers/index.mdx @@ -22,15 +22,25 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro ## Provider Categories - **[External Providers](external/index.mdx)** - Guide for building and using external providers +- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility layer - **[Inference](inference/index.mdx)** - LLM and embedding model providers - **[Agents](agents/index.mdx)** - Agentic system providers - **[DatasetIO](datasetio/index.mdx)** - Dataset and data loader providers - **[Safety](safety/index.mdx)** - Content moderation and safety providers -- **[Telemetry](telemetry/index.mdx)** - Monitoring and observability providers - **[Vector IO](vector_io/index.mdx)** - Vector database providers - **[Tool Runtime](tool_runtime/index.mdx)** - Tool and protocol providers - **[Files](files/index.mdx)** - File system and storage providers -## Other information about Providers -- **[OpenAI Compatibility](./openai.mdx)** - OpenAI API compatibility layer +## API Documentation + +For comprehensive API documentation and reference: + +- **[API Reference](../api/index.mdx)** - Complete API documentation +- **[Experimental APIs](../api-experimental/index.mdx)** - APIs in development +- **[Deprecated APIs](../api-deprecated/index.mdx)** - Legacy APIs being phased out +- **[OpenAI Compatibility](../api-openai/index.mdx)** - OpenAI API compatibility guide + +## Additional Provider Information + +- **[OpenAI Implementation Guide](./openai.mdx)** - Code examples and implementation details for OpenAI APIs - **[OpenAI-Compatible Responses Limitations](./openai_responses_limitations.mdx)** - Known limitations of the Responses API in Llama Stack diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index c2bf69962..ad050e501 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,11 +1,13 @@ --- -description: "Inference +description: | + Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search." + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. sidebar_label: Inference title: Inference --- @@ -16,10 +18,11 @@ title: Inference Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. +Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. +This API provides the raw interface to the underlying models. Three kinds of models are supported: +- LLM models: these models generate "raw" and "chat" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search. +- Rerank models: these models reorder the documents based on their relevance to a query. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/docs/providers/inference/inline_meta-reference.mdx b/docs/docs/providers/inference/inline_meta-reference.mdx index 328586f9a..55b1606b0 100644 --- a/docs/docs/providers/inference/inline_meta-reference.mdx +++ b/docs/docs/providers/inference/inline_meta-reference.mdx @@ -16,12 +16,12 @@ Meta's reference implementation of inference with support for various model form |-------|------|----------|---------|-------------| | `model` | `str \| None` | No | | | | `torch_seed` | `int \| None` | No | | | -| `max_seq_len` | `` | No | 4096 | | -| `max_batch_size` | `` | No | 1 | | +| `max_seq_len` | `int` | No | 4096 | | +| `max_batch_size` | `int` | No | 1 | | | `model_parallel_size` | `int \| None` | No | | | -| `create_distributed_process_group` | `` | No | True | | +| `create_distributed_process_group` | `bool` | No | True | | | `checkpoint_dir` | `str \| None` | No | | | -| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | | +| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig \| None` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx index 4acbbac50..14b431894 100644 --- a/docs/docs/providers/inference/remote_anthropic.mdx +++ b/docs/docs/providers/inference/remote_anthropic.mdx @@ -14,9 +14,9 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx index b3041259e..0382b42d7 100644 --- a/docs/docs/providers/inference/remote_azure.mdx +++ b/docs/docs/providers/inference/remote_azure.mdx @@ -21,10 +21,10 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `api_base` | `` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | @@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview ```yaml api_key: ${env.AZURE_API_KEY:=} -api_base: ${env.AZURE_API_BASE:=} +base_url: ${env.AZURE_API_BASE:=} api_version: ${env.AZURE_API_VERSION:=} api_type: ${env.AZURE_API_TYPE:=} ``` diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx index 683ec12f8..0b36ea01a 100644 --- a/docs/docs/providers/inference/remote_bedrock.mdx +++ b/docs/docs/providers/inference/remote_bedrock.mdx @@ -1,5 +1,5 @@ --- -description: "AWS Bedrock inference provider for accessing various AI models through AWS's managed service." +description: "AWS Bedrock inference provider using OpenAI compatible endpoint." sidebar_label: Remote - Bedrock title: remote::bedrock --- @@ -8,27 +8,20 @@ title: remote::bedrock ## Description -AWS Bedrock inference provider for accessing various AI models through AWS's managed service. +AWS Bedrock inference provider using OpenAI compatible endpoint. ## Configuration | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | -| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | -| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | -| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | -| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | -| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | -| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | -| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | -| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. | -| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint | ## Sample Configuration ```yaml -{} +api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} +region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} ``` diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx index cda0be224..9fd390a29 100644 --- a/docs/docs/providers/inference/remote_cerebras.mdx +++ b/docs/docs/providers/inference/remote_cerebras.mdx @@ -14,14 +14,14 @@ Cerebras inference provider for running models on Cerebras Cloud platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `base_url` | `` | No | https://api.cerebras.ai | Base URL for the Cerebras API | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API | ## Sample Configuration ```yaml -base_url: https://api.cerebras.ai +base_url: https://api.cerebras.ai/v1 api_key: ${env.CEREBRAS_API_KEY:=} ``` diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx index f14fd0175..d50c52958 100644 --- a/docs/docs/providers/inference/remote_databricks.mdx +++ b/docs/docs/providers/inference/remote_databricks.mdx @@ -14,14 +14,14 @@ Databricks inference provider for running models on Databricks' unified analytic | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The Databricks API token | -| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_token` | `SecretStr \| None` | No | | The Databricks API token | +| `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) | ## Sample Configuration ```yaml -url: ${env.DATABRICKS_HOST:=} +base_url: ${env.DATABRICKS_HOST:=} api_token: ${env.DATABRICKS_TOKEN:=} ``` diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx index 71f16ccec..a67403a9b 100644 --- a/docs/docs/providers/inference/remote_fireworks.mdx +++ b/docs/docs/providers/inference/remote_fireworks.mdx @@ -14,14 +14,14 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | ## Sample Configuration ```yaml -url: https://api.fireworks.ai/inference/v1 +base_url: https://api.fireworks.ai/inference/v1 api_key: ${env.FIREWORKS_API_KEY:=} ``` diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx index 22b3c8cb7..75e6b9692 100644 --- a/docs/docs/providers/inference/remote_gemini.mdx +++ b/docs/docs/providers/inference/remote_gemini.mdx @@ -14,9 +14,9 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx index aaf1516ca..17acd3140 100644 --- a/docs/docs/providers/inference/remote_groq.mdx +++ b/docs/docs/providers/inference/remote_groq.mdx @@ -14,14 +14,14 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.groq.com | The URL for the Groq AI server | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server | ## Sample Configuration ```yaml -url: https://api.groq.com +base_url: https://api.groq.com/openai/v1 api_key: ${env.GROQ_API_KEY:=} ``` diff --git a/docs/docs/providers/inference/remote_hf_endpoint.mdx b/docs/docs/providers/inference/remote_hf_endpoint.mdx index 771b24f8d..52b40c1f2 100644 --- a/docs/docs/providers/inference/remote_hf_endpoint.mdx +++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx @@ -14,8 +14,8 @@ HuggingFace Inference Endpoints provider for dedicated model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `endpoint_name` | `` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | +| `endpoint_name` | `str` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | +| `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_hf_serverless.mdx b/docs/docs/providers/inference/remote_hf_serverless.mdx index 1a89b8e3e..52280df82 100644 --- a/docs/docs/providers/inference/remote_hf_serverless.mdx +++ b/docs/docs/providers/inference/remote_hf_serverless.mdx @@ -14,8 +14,8 @@ HuggingFace Inference API serverless provider for on-demand model inference. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `huggingface_repo` | `` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | +| `huggingface_repo` | `str` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | +| `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx index 9769c0793..69e90b2ac 100644 --- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx +++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx @@ -14,14 +14,14 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `openai_compat_api_base` | `` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | ## Sample Configuration ```yaml -openai_compat_api_base: https://api.llama.com/compat/v1/ +base_url: https://api.llama.com/compat/v1/ api_key: ${env.LLAMA_API_KEY} ``` diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx index b4e04176c..a890bc57f 100644 --- a/docs/docs/providers/inference/remote_nvidia.mdx +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -14,17 +14,16 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | -| `timeout` | `` | No | 60 | Timeout for the HTTP requests | -| `append_api_version` | `` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM | +| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | +| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | ## Sample Configuration ```yaml -url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} +base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} api_key: ${env.NVIDIA_API_KEY:=} -append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} ``` diff --git a/docs/docs/providers/inference/remote_oci.mdx b/docs/docs/providers/inference/remote_oci.mdx new file mode 100644 index 000000000..d448755bf --- /dev/null +++ b/docs/docs/providers/inference/remote_oci.mdx @@ -0,0 +1,41 @@ +--- +description: | + Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. + Provider documentation + https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm +sidebar_label: Remote - Oci +title: remote::oci +--- + +# remote::oci + +## Description + + +Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. +Provider documentation +https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `oci_auth_type` | `str` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) | +| `oci_region` | `str` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) | +| `oci_compartment_id` | `str` | No | | OCI compartment ID for the Generative AI service | +| `oci_config_file_path` | `str` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) | +| `oci_config_profile` | `str` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) | + +## Sample Configuration + +```yaml +oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal} +oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config} +oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT} +oci_region: ${env.OCI_REGION:=us-ashburn-1} +oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=} +``` diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx index e00e34e4a..f9be84add 100644 --- a/docs/docs/providers/inference/remote_ollama.mdx +++ b/docs/docs/providers/inference/remote_ollama.mdx @@ -14,12 +14,12 @@ Ollama inference provider for running local models through the Ollama runtime. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `url` | `` | No | http://localhost:11434 | | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | | ## Sample Configuration ```yaml -url: ${env.OLLAMA_URL:=http://localhost:11434} +base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} ``` diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx index 28c8ab7bf..3ac3a21ad 100644 --- a/docs/docs/providers/inference/remote_openai.mdx +++ b/docs/docs/providers/inference/remote_openai.mdx @@ -14,10 +14,10 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `base_url` | `` | No | https://api.openai.com/v1 | Base URL for OpenAI API | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx index 7a2931690..325ecc352 100644 --- a/docs/docs/providers/inference/remote_passthrough.mdx +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -14,14 +14,14 @@ Passthrough inference provider for connecting to any external inference service | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | -| `url` | `` | No | | The URL for the passthrough endpoint | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint | ## Sample Configuration ```yaml -url: ${env.PASSTHROUGH_URL} +base_url: ${env.PASSTHROUGH_URL} api_key: ${env.PASSTHROUGH_API_KEY} ``` diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx index 3cbbd0322..6cdcdd3b5 100644 --- a/docs/docs/providers/inference/remote_runpod.mdx +++ b/docs/docs/providers/inference/remote_runpod.mdx @@ -14,14 +14,14 @@ RunPod inference provider for running models on RunPod's cloud GPU platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | -| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_token` | `SecretStr \| None` | No | | The API token | +| `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint | ## Sample Configuration ```yaml -url: ${env.RUNPOD_URL:=} +base_url: ${env.RUNPOD_URL:=} api_token: ${env.RUNPOD_API_TOKEN} ``` diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx index 0ac4600b7..bbefdb0f0 100644 --- a/docs/docs/providers/inference/remote_sambanova.mdx +++ b/docs/docs/providers/inference/remote_sambanova.mdx @@ -14,14 +14,14 @@ SambaNova inference provider for running models on SambaNova's dataflow architec | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | ## Sample Configuration ```yaml -url: https://api.sambanova.ai/v1 +base_url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY:=} ``` diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx index 67fe6d237..3790acdd4 100644 --- a/docs/docs/providers/inference/remote_tgi.mdx +++ b/docs/docs/providers/inference/remote_tgi.mdx @@ -14,12 +14,12 @@ Text Generation Inference (TGI) provider for HuggingFace model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `url` | `` | No | | The URL for the TGI serving endpoint | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) | ## Sample Configuration ```yaml -url: ${env.TGI_URL:=} +base_url: ${env.TGI_URL:=} ``` diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx index c8e3bcdcf..dc025b5ac 100644 --- a/docs/docs/providers/inference/remote_together.mdx +++ b/docs/docs/providers/inference/remote_together.mdx @@ -14,14 +14,14 @@ Together AI inference provider for open-source models and collaborative AI devel | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.together.xyz/v1 | The URL for the Together AI server | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server | ## Sample Configuration ```yaml -url: https://api.together.xyz/v1 +base_url: https://api.together.xyz/v1 api_key: ${env.TOGETHER_API_KEY:=} ``` diff --git a/docs/docs/providers/inference/remote_vertexai.mdx b/docs/docs/providers/inference/remote_vertexai.mdx index c182ed485..59b574561 100644 --- a/docs/docs/providers/inference/remote_vertexai.mdx +++ b/docs/docs/providers/inference/remote_vertexai.mdx @@ -53,10 +53,10 @@ Available Models: | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `project` | `` | No | | Google Cloud project ID for Vertex AI | -| `location` | `` | No | us-central1 | Google Cloud location for Vertex AI | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `project` | `str` | No | | Google Cloud project ID for Vertex AI | +| `location` | `str` | No | us-central1 | Google Cloud location for Vertex AI | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx index f844bcee0..a52c24adb 100644 --- a/docs/docs/providers/inference/remote_vllm.mdx +++ b/docs/docs/providers/inference/remote_vllm.mdx @@ -14,17 +14,17 @@ Remote vLLM inference provider for connecting to vLLM servers. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | -| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | -| `max_tokens` | `` | No | 4096 | Maximum number of tokens to generate. | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_token` | `SecretStr \| None` | No | | The API token | +| `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint | +| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | ## Sample Configuration ```yaml -url: ${env.VLLM_URL:=} +base_url: ${env.VLLM_URL:=} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx index 2227aa1cc..47d543e3a 100644 --- a/docs/docs/providers/inference/remote_watsonx.mdx +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -14,17 +14,17 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `project_id` | `str \| None` | No | | The watsonx.ai project ID | -| `timeout` | `` | No | 60 | Timeout for the HTTP requests | +| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | ## Sample Configuration ```yaml -url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} +base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=} project_id: ${env.WATSONX_PROJECT_ID:=} ``` diff --git a/docs/docs/providers/openai.mdx b/docs/docs/providers/openai.mdx index 84436e769..c3bb46ecf 100644 --- a/docs/docs/providers/openai.mdx +++ b/docs/docs/providers/openai.mdx @@ -1,9 +1,14 @@ --- -title: OpenAI Compatibility -description: OpenAI API Compatibility -sidebar_label: OpenAI Compatibility -sidebar_position: 1 +title: OpenAI Implementation Guide +description: Code examples and implementation details for OpenAI API compatibility +sidebar_label: OpenAI Implementation +sidebar_position: 2 --- + +# OpenAI Implementation Guide + +This guide provides detailed code examples and implementation details for using OpenAI-compatible APIs with Llama Stack. For a comprehensive overview of OpenAI compatibility features, see our [OpenAI API Compatibility Guide](../api-openai/index.mdx). + ## OpenAI API Compatibility ### Server path @@ -195,3 +200,9 @@ Lines of code unfurl Logic whispers in the dark Art in hidden form ``` + +## Additional Resources + +- **[OpenAI API Compatibility Guide](../api-openai/index.mdx)** - Comprehensive overview of OpenAI compatibility features +- **[OpenAI Responses API Limitations](./openai_responses_limitations.mdx)** - Detailed limitations and known issues +- **[Provider Documentation](../index.mdx)** - Complete provider ecosystem overview diff --git a/docs/docs/providers/openai_responses_limitations.mdx b/docs/docs/providers/openai_responses_limitations.mdx index 9d9ccfbe2..19007438e 100644 --- a/docs/docs/providers/openai_responses_limitations.mdx +++ b/docs/docs/providers/openai_responses_limitations.mdx @@ -48,11 +48,9 @@ Both OpenAI and Llama Stack support a web-search built-in tool. The [OpenAI doc > The type of the web search tool. One of `web_search` or `web_search_2025_08_26`. -In contrast, the [Llama Stack documentation](https://llamastack.github.io/docs/api/create-a-new-open-ai-response) says that the allowed values for `type` for web search are `MOD1`, `MOD2` and `MOD3`. -Is that correct? If so, what are the meanings of each of them? It might make sense for the allowed values for OpenAI map to some values for Llama Stack so that code written to the OpenAI specification -also work with Llama Stack. +Llama Stack now supports both `web_search` and `web_search_2025_08_26` types, matching OpenAI's API. For backward compatibility, Llama Stack also supports `web_search_preview` and `web_search_preview_2025_03_11` types. -The OpenAI web search tool also has fields for `filters` and `user_location` which are not documented as options for Llama Stack. If feasible, it would be good to support these too. +The OpenAI web search tool also has fields for `filters` and `user_location` which are not yet implemented in Llama Stack. If feasible, it would be good to support these too. --- diff --git a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx index ac7644de7..0d4241b27 100644 --- a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx +++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx @@ -14,23 +14,23 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `device` | `` | No | cuda | | -| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | | -| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | | -| `chat_template` | `` | No | `<|user|>`
`{input}`
`<|assistant|>`
`{output}` | | -| `model_specific_config` | `` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | -| `max_seq_length` | `` | No | 2048 | | -| `gradient_checkpointing` | `` | No | False | | -| `save_total_limit` | `` | No | 3 | | -| `logging_steps` | `` | No | 10 | | -| `warmup_ratio` | `` | No | 0.1 | | -| `weight_decay` | `` | No | 0.01 | | -| `dataloader_num_workers` | `` | No | 4 | | -| `dataloader_pin_memory` | `` | No | True | | -| `dpo_beta` | `` | No | 0.1 | | -| `use_reference_model` | `` | No | True | | -| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | | -| `dpo_output_dir` | `` | No | | | +| `device` | `str` | No | cuda | | +| `distributed_backend` | `Literal[fsdp, deepspeed] \| None` | No | | | +| `checkpoint_format` | `Literal[full_state, huggingface] \| None` | No | huggingface | | +| `chat_template` | `str` | No | `<|user|>`
`{input}`
`<|assistant|>`
`{output}` | | +| `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | +| `max_seq_length` | `int` | No | 2048 | | +| `gradient_checkpointing` | `bool` | No | False | | +| `save_total_limit` | `int` | No | 3 | | +| `logging_steps` | `int` | No | 10 | | +| `warmup_ratio` | `float` | No | 0.1 | | +| `weight_decay` | `float` | No | 0.01 | | +| `dataloader_num_workers` | `int` | No | 4 | | +| `dataloader_pin_memory` | `bool` | No | True | | +| `dpo_beta` | `float` | No | 0.1 | | +| `use_reference_model` | `bool` | No | True | | +| `dpo_loss_type` | `Literal[sigmoid, hinge, ipo, kto_pair]` | No | sigmoid | | +| `dpo_output_dir` | `str` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx index f789392fc..3e2c15d3e 100644 --- a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | +| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx index bd87797af..ac222d8a5 100644 --- a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | +| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/remote_nvidia.mdx b/docs/docs/providers/post_training/remote_nvidia.mdx index 448ac4c75..d0208f82f 100644 --- a/docs/docs/providers/post_training/remote_nvidia.mdx +++ b/docs/docs/providers/post_training/remote_nvidia.mdx @@ -18,9 +18,9 @@ NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform. | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. | | `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API | -| `timeout` | `` | No | 300 | Timeout for the NVIDIA Post Training API | -| `max_retries` | `` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | -| `output_model_dir` | `` | No | test-example-model@v1 | Directory to save the output model | +| `timeout` | `int` | No | 300 | Timeout for the NVIDIA Post Training API | +| `max_retries` | `int` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | +| `output_model_dir` | `str` | No | test-example-model@v1 | Directory to save the output model | ## Sample Configuration diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx index 4e2de4f33..e7205f4ad 100644 --- a/docs/docs/providers/safety/index.mdx +++ b/docs/docs/providers/safety/index.mdx @@ -1,7 +1,8 @@ --- -description: "Safety +description: | + Safety - OpenAI-compatible Moderations API." + OpenAI-compatible Moderations API. sidebar_label: Safety title: Safety --- @@ -12,6 +13,6 @@ title: Safety Safety - OpenAI-compatible Moderations API. +OpenAI-compatible Moderations API. This section contains documentation for all available providers for the **safety** API. diff --git a/docs/docs/providers/safety/inline_llama-guard.mdx b/docs/docs/providers/safety/inline_llama-guard.mdx index 65866c9b2..d52e7289a 100644 --- a/docs/docs/providers/safety/inline_llama-guard.mdx +++ b/docs/docs/providers/safety/inline_llama-guard.mdx @@ -14,7 +14,7 @@ Llama Guard safety provider for content moderation and safety filtering using Me | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `excluded_categories` | `list[str` | No | [] | | +| `excluded_categories` | `list[str]` | No | [] | | ## Sample Configuration diff --git a/docs/docs/providers/safety/inline_prompt-guard.mdx b/docs/docs/providers/safety/inline_prompt-guard.mdx index c52e03e4b..dc57f8555 100644 --- a/docs/docs/providers/safety/inline_prompt-guard.mdx +++ b/docs/docs/providers/safety/inline_prompt-guard.mdx @@ -14,7 +14,7 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `guard_type` | `` | No | injection | | +| `guard_type` | `str` | No | injection | | ## Sample Configuration diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx index 663a761f0..990bd7246 100644 --- a/docs/docs/providers/safety/remote_bedrock.mdx +++ b/docs/docs/providers/safety/remote_bedrock.mdx @@ -14,8 +14,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | | `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | diff --git a/docs/docs/providers/safety/remote_nvidia.mdx b/docs/docs/providers/safety/remote_nvidia.mdx index 0f665e60a..ac1fd0b03 100644 --- a/docs/docs/providers/safety/remote_nvidia.mdx +++ b/docs/docs/providers/safety/remote_nvidia.mdx @@ -14,7 +14,7 @@ NVIDIA's safety provider for content moderation and safety filtering. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `guardrails_service_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | +| `guardrails_service_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | | `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store | ## Sample Configuration diff --git a/docs/docs/providers/safety/remote_sambanova.mdx b/docs/docs/providers/safety/remote_sambanova.mdx index da70fce6c..69712879c 100644 --- a/docs/docs/providers/safety/remote_sambanova.mdx +++ b/docs/docs/providers/safety/remote_sambanova.mdx @@ -14,8 +14,8 @@ SambaNova's safety provider for content moderation and safety filtering. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | +| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `api_key` | `SecretStr \| None` | No | | The SambaNova cloud API Key | ## Sample Configuration diff --git a/docs/docs/providers/telemetry/index.mdx b/docs/docs/providers/telemetry/index.mdx deleted file mode 100644 index 07190d625..000000000 --- a/docs/docs/providers/telemetry/index.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -sidebar_label: Telemetry -title: Telemetry ---- - -# Telemetry - -## Overview - -This section contains documentation for all available providers for the **telemetry** API. diff --git a/docs/docs/providers/telemetry/inline_meta-reference.mdx b/docs/docs/providers/telemetry/inline_meta-reference.mdx deleted file mode 100644 index d8b3157d1..000000000 --- a/docs/docs/providers/telemetry/inline_meta-reference.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -description: "Meta's reference implementation of telemetry and observability using OpenTelemetry." -sidebar_label: Meta-Reference -title: inline::meta-reference ---- - -# inline::meta-reference - -## Description - -Meta's reference implementation of telemetry and observability using OpenTelemetry. - -## Configuration - -| Field | Type | Required | Default | Description | -|-------|------|----------|---------|-------------| -| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. | -| `service_name` | `` | No | ​ | The service name to use for telemetry | -| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, console) | - -## Sample Configuration - -```yaml -service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" -sinks: ${env.TELEMETRY_SINKS:=} -otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} -``` diff --git a/docs/docs/providers/tool_runtime/remote_bing-search.mdx b/docs/docs/providers/tool_runtime/remote_bing-search.mdx index ec06bc20f..f97087d9e 100644 --- a/docs/docs/providers/tool_runtime/remote_bing-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_bing-search.mdx @@ -15,7 +15,7 @@ Bing Search tool for web search capabilities using Microsoft's search engine. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | | -| `top_k` | `` | No | 3 | | +| `top_k` | `int` | No | 3 | | ## Sample Configuration diff --git a/docs/docs/providers/tool_runtime/remote_brave-search.mdx b/docs/docs/providers/tool_runtime/remote_brave-search.mdx index 3aeed67d5..987ce0e41 100644 --- a/docs/docs/providers/tool_runtime/remote_brave-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_brave-search.mdx @@ -15,7 +15,7 @@ Brave Search tool for web search capabilities with privacy-focused results. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Brave Search API Key | -| `max_results` | `` | No | 3 | The maximum number of results to return | +| `max_results` | `int` | No | 3 | The maximum number of results to return | ## Sample Configuration diff --git a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx index fdca31bbe..36ad63646 100644 --- a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx @@ -15,7 +15,7 @@ Tavily Search tool for AI-optimized web search with structured results. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Tavily Search API Key | -| `max_results` | `` | No | 3 | The maximum number of results to return | +| `max_results` | `int` | No | 3 | The maximum number of results to return | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx index 0be5cd5b3..d78a67b01 100644 --- a/docs/docs/providers/vector_io/inline_chromadb.mdx +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -78,8 +78,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | | -| `persistence` | `` | No | | Config for KV store backend | +| `db_path` | `str` | No | | | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx index 3a1fba055..c1eedf9db 100644 --- a/docs/docs/providers/vector_io/inline_faiss.mdx +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -95,7 +95,7 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx index 17fd40cf5..9266b65b5 100644 --- a/docs/docs/providers/vector_io/inline_meta-reference.mdx +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx index 6063edab1..e8408a74f 100644 --- a/docs/docs/providers/vector_io/inline_milvus.mdx +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -16,9 +16,9 @@ Please refer to the remote provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | -| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | +| `db_path` | `str` | No | | | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | +| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx index 057d96761..8f6155732 100644 --- a/docs/docs/providers/vector_io/inline_qdrant.mdx +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -97,8 +97,8 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `path` | `` | No | | | -| `persistence` | `` | No | | | +| `path` | `str` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index 98a372250..b63d9db72 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -72,14 +72,14 @@ description: | Example with hybrid search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, ) # Using RRF ranker response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={ "mode": "hybrid", @@ -91,7 +91,7 @@ description: | # Using weighted ranker response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={ "mode": "hybrid", @@ -105,7 +105,7 @@ description: | Example with explicit vector search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, ) @@ -114,7 +114,7 @@ description: | Example with keyword search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, ) @@ -153,7 +153,7 @@ description: | Example using RAGQueryConfig with different search modes: ```python - from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker + from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker # Vector search config = RAGQueryConfig(mode="vector", max_chunks=5) @@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes: Example with hybrid search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, ) # Using RRF ranker response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={ "mode": "hybrid", @@ -296,7 +296,7 @@ response = await vector_io.query_chunks( # Using weighted ranker response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={ "mode": "hybrid", @@ -310,7 +310,7 @@ response = await vector_io.query_chunks( Example with explicit vector search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, ) @@ -319,7 +319,7 @@ response = await vector_io.query_chunks( Example with keyword search: ```python response = await vector_io.query_chunks( - vector_db_id="my_db", + vector_store_id="my_db", query="your query here", params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, ) @@ -358,7 +358,7 @@ Two ranker types are supported: Example using RAGQueryConfig with different search modes: ```python -from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker +from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker # Vector search config = RAGQueryConfig(mode="vector", max_chunks=5) @@ -407,8 +407,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | Path to the SQLite database file | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | +| `db_path` | `str` | No | | Path to the SQLite database file | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 67cbd0021..a25ff1b28 100644 --- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -16,8 +16,8 @@ Please refer to the sqlite-vec provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | Path to the SQLite database file | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | +| `db_path` | `str` | No | | Path to the SQLite database file | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx index 2aee3eeca..970f4420f 100644 --- a/docs/docs/providers/vector_io/remote_chromadb.mdx +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | | | -| `persistence` | `` | No | | Config for KV store backend | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx index bf9935d61..3e8ae71cf 100644 --- a/docs/docs/providers/vector_io/remote_milvus.mdx +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -405,10 +405,10 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `uri` | `` | No | | The URI of the Milvus server | +| `uri` | `str` | No | | The URI of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server | -| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | -| `persistence` | `` | No | | Config for KV store backend | +| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | :::note diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx index cb70f35d1..cd69e2b2f 100644 --- a/docs/docs/providers/vector_io/remote_pgvector.mdx +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `db` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx index dff9642b5..9b5117bcb 100644 --- a/docs/docs/providers/vector_io/remote_qdrant.mdx +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -19,14 +19,14 @@ Please refer to the inline provider documentation. | `location` | `str \| None` | No | | | | `url` | `str \| None` | No | | | | `port` | `int \| None` | No | 6333 | | -| `grpc_port` | `` | No | 6334 | | -| `prefer_grpc` | `` | No | False | | +| `grpc_port` | `int` | No | 6334 | | +| `prefer_grpc` | `bool` | No | False | | | `https` | `bool \| None` | No | | | | `api_key` | `str \| None` | No | | | | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | -| `persistence` | `` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx index b809bed2e..7a29d0d48 100644 --- a/docs/docs/providers/vector_io/remote_weaviate.mdx +++ b/docs/docs/providers/vector_io/remote_weaviate.mdx @@ -75,7 +75,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more |-------|------|----------|---------|-------------| | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/references/llama_stack_client_cli_reference.md b/docs/docs/references/llama_stack_client_cli_reference.md index a4321938a..fd87e7dbd 100644 --- a/docs/docs/references/llama_stack_client_cli_reference.md +++ b/docs/docs/references/llama_stack_client_cli_reference.md @@ -78,8 +78,6 @@ llama-stack-client providers list +-----------+----------------+-----------------+ | agents | meta-reference | meta-reference | +-----------+----------------+-----------------+ -| telemetry | meta-reference | meta-reference | -+-----------+----------------+-----------------+ | safety | meta-reference | meta-reference | +-----------+----------------+-----------------+ ``` diff --git a/docs/docs/references/python_sdk_reference/index.md b/docs/docs/references/python_sdk_reference/index.md index 686567458..532341a4d 100644 --- a/docs/docs/references/python_sdk_reference/index.md +++ b/docs/docs/references/python_sdk_reference/index.md @@ -360,32 +360,6 @@ Methods: - client.synthetic_data_generation.generate(\*\*params) -> SyntheticDataGenerationResponse -## Telemetry - -Types: - -```python -from llama_stack_client.types import ( - QuerySpansResponse, - SpanWithStatus, - Trace, - TelemetryGetSpanResponse, - TelemetryGetSpanTreeResponse, - TelemetryQuerySpansResponse, - TelemetryQueryTracesResponse, -) -``` - -Methods: - -- client.telemetry.get_span(span_id, \*, trace_id) -> TelemetryGetSpanResponse -- client.telemetry.get_span_tree(span_id, \*\*params) -> TelemetryGetSpanTreeResponse -- client.telemetry.get_trace(trace_id) -> Trace -- client.telemetry.log_event(\*\*params) -> None -- client.telemetry.query_spans(\*\*params) -> TelemetryQuerySpansResponse -- client.telemetry.query_traces(\*\*params) -> TelemetryQueryTracesResponse -- client.telemetry.save_spans_to_dataset(\*\*params) -> None - ## Datasetio Types: diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 70406474f..69045c36d 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -71,6 +71,11 @@ const config: Config = { docs: { sidebarPath: require.resolve("./sidebars.ts"), docItemComponent: "@theme/ApiItem", // Derived from docusaurus-theme-openapi + remarkPlugins: [ + [require('remark-code-import'), { + rootDir: require('path').join(__dirname, '..') // Repository root + }] + ], }, blog: false, theme: { diff --git a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb index 51604f6d1..899216d7a 100644 --- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb +++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb @@ -37,7 +37,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack\n", + "!pip install -U llama-stack llama-stack-client\n", "llama stack list-deps fireworks | xargs -L1 uv pip install\n" ] }, diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 94af24258..d51c0d39a 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -44,7 +44,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack" + "!pip install -U llama-stack llama-stack-client\n" ] }, { diff --git a/docs/notebooks/llamastack_agents_getting_started_examples.ipynb b/docs/notebooks/llamastack_agents_getting_started_examples.ipynb new file mode 100644 index 000000000..1ac1a2f92 --- /dev/null +++ b/docs/notebooks/llamastack_agents_getting_started_examples.ipynb @@ -0,0 +1,1036 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/notebooks/llamastack_agents_getting_started_examples.ipynb)\n", + "\n", + "# Llama Stack Agents - Getting Started Guide\n", + "\n", + "This notebook provides a comprehensive introduction to building AI agents with Llama Stack. The Agent SDK is built on top of an open source version of **OpenAI's Responses+ APIs**, providing a standardized interface for agent workflows.\n", + "\n", + "## What You'll Learn\n", + "\n", + "1. **Basic Agent Creation** - Simple Q&A agents with streaming\n", + "2. **Multi-Turn Conversations** - Maintaining context across conversations\n", + "3. **RAG Integration** - Adding knowledge bases to your agents \n", + "4. **MCP Tools** - Extending agents with Model Context Protocol tools\n", + "\n", + "## Prerequisites\n", + "\n", + "- Llama Stack server running: `llama stack run starter --port 8321`\n", + "- A model provider configured (Ollama, Fireworks, etc.)\n", + "- Python 3.10+\n", + "\n", + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Client initialized successfully!\n", + " Base URL: http://localhost:8321\n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import json\n", + "from typing import Any, Dict\n", + "\n", + "from llama_stack_client import LlamaStackClient, Agent\n", + "from llama_stack_client.types import UserMessage\n", + "\n", + "# Initialize client\n", + "client = LlamaStackClient(base_url=\"http://localhost:8321\")\n", + "\n", + "print(\"✅ Client initialized successfully!\")\n", + "print(f\" Base URL: http://localhost:8321\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created agent successfully\n" + ] + } + ], + "source": [ + "# Create a basic agent using the Agent class\n", + "agent = Agent(\n", + " client=client,\n", + " model=\"ollama/llama3.3:70b\",\n", + " instructions=\"You are a helpful AI assistant that can answer questions and help with tasks.\",\n", + ")\n", + "\n", + "print(\"✅ Created agent successfully\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Part 1: Basic Agent Example\n", + "\n", + "Let's start with a simple agent that can answer questions. This demonstrates:\n", + "- Agent creation with basic configuration\n", + "- Session management\n", + "- Streaming responses" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/conversations \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created session: conv_e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c\n" + ] + } + ], + "source": [ + "# Create agent session\n", + "basic_session_id = agent.create_session(session_name=\"basic_example_session\")\n", + "\n", + "print(f\"✅ Created session: {basic_session_id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User: What is the capital of France? Please explain briefly.\n", + "\n", + "Assistant: The capital of France is Paris. It's the country's largest city, known for iconic landmarks like the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, serving as the center of French politics, culture, and economy.The capital of France is Paris. It's the country's largest city, known for iconic landmarks like the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, serving as the center of French politics, culture, and economy.\n", + "\n", + "✅ Response captured: 223 characters\n" + ] + } + ], + "source": [ + "# Send a message to the agent with streaming\n", + "query = \"What is the capital of France? Please explain briefly.\"\n", + "\n", + "print(f\"User: {query}\\n\")\n", + "print(\"Assistant: \", end='')\n", + "\n", + "# Create a turn with streaming\n", + "response = agent.create_turn(\n", + " session_id=basic_session_id,\n", + " messages=[UserMessage(content=query, role=\"user\")],\n", + " stream=True,\n", + ")\n", + "\n", + "# Stream the response\n", + "output_text = \"\"\n", + "for chunk in response:\n", + " if chunk.event.event_type == \"turn_completed\":\n", + " output_text = chunk.event.final_text\n", + " print(output_text)\n", + " break\n", + " elif chunk.event.event_type == \"step_progress\":\n", + " # Print text deltas as they arrive\n", + " if hasattr(chunk.event.delta, 'text'):\n", + " print(chunk.event.delta.text, end='', flush=True)\n", + "\n", + "print(f\"\\n✅ Response captured: {len(output_text)} characters\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: DELETE http://localhost:8321/v1/conversations/conv_e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Session cleaned up\n" + ] + } + ], + "source": [ + "# Clean up the session\n", + "client.conversations.delete(conversation_id=basic_session_id)\n", + "print(\"✅ Session cleaned up\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Part 2: Advanced Agent Features\n", + "\n", + "Now let's explore more advanced capabilities:\n", + "- Multi-turn conversations with context memory\n", + "- RAG (Retrieval-Augmented Generation) patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.1 Multi-Turn Conversation\n", + "\n", + "Demonstrate how agents can maintain context across multiple conversation turns." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/conversations \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created conversation agent\n", + "✅ Created session: conv_936121c2e27b7d1f7d3f0b6a62adce867d79268f5f9ce265\n" + ] + } + ], + "source": [ + "# Create agent for multi-turn conversation\n", + "conv_agent = Agent(\n", + " client=client,\n", + " model=\"ollama/llama3.3:70b\",\n", + " instructions=\"You are a helpful assistant that remembers context from previous messages.\",\n", + ")\n", + "\n", + "print(\"✅ Created conversation agent\")\n", + "\n", + "conv_session_id = conv_agent.create_session(session_name=\"multi_turn_session\")\n", + "print(f\"✅ Created session: {conv_session_id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "============================================================\n", + "Turn 1\n", + "============================================================\n", + "User: My name is Alice and I'm learning about AI.\n", + "Assistant: Nice to meet you, Alice! It's great that you're interested in learning about AI. What aspects of AI would you like to explore? Are you curious about machine learning, natural language processing, or something else? I'll be happy to help and provide information tailored to your interests.Nice to meet you, Alice! It's great that you're interested in learning about AI. What aspects of AI would you like to explore? Are you curious about machine learning, natural language processing, or something else? I'll be happy to help and provide information tailored to your interests.\n", + "\n", + "============================================================\n", + "Turn 2\n", + "============================================================\n", + "User: What are some good resources for beginners?\n", + "Assistant: " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "As a beginner, it's essential to start with resources that provide a solid foundation in AI concepts. Here are some recommendations:\n", + "\n", + "1. **Online Courses**:\n", + "\t* Andrew Ng's Machine Learning course on Coursera: A popular and comprehensive introduction to machine learning.\n", + "\t* Stanford University's Natural Language Processing with Deep Learning Specialization on Coursera: Covers NLP fundamentals and deep learning techniques.\n", + "2. **Books**:\n", + "\t* \"Introduction to Artificial Intelligence\" by Philip C. Jackson Jr.: A gentle introduction to AI concepts, including machine learning and computer vision.\n", + "\t* \"Deep Learning\" by Ian Goodfellow, Yoshua Bengio, and Aaron Courville: A detailed book on deep learning techniques, although it may require some prior knowledge of linear algebra and calculus.\n", + "3. **Websites and Blogs**:\n", + "\t* Machine Learning Mastery: A website offering tutorials, examples, and explanations on various machine learning topics.\n", + "\t* KDnuggets: A popular blog covering AI, machine learning, and data science news, tutorials, and research papers.\n", + "4. **YouTube Channels**:\n", + "\t* 3Blue1Brown (Grant Sanderson): Engaging video explanations on AI, machine learning, and linear algebra concepts.\n", + "\t* Sentdex: Offers video tutorials on various AI topics, including machine learning, deep learning, and computer vision.\n", + "5. **Communities and Forums**:\n", + "\t* Kaggle: A platform for data science competitions and hosting datasets, where you can learn from others and participate in discussions.\n", + "\t* Reddit's r/MachineLearning and r/AI: Active communities discussing AI-related topics, sharing resources, and providing feedback on projects.\n", + "\n", + "Remember, learning about AI is a continuous process. Start with the basics, build projects, and gradually move on to more advanced topics. Practice and experimentation are key to gaining hands-on experience.\n", + "\n", + "What specific area of AI would you like to explore first, Alice?As a beginner, it's essential to start with resources that provide a solid foundation in AI concepts. Here are some recommendations:\n", + "\n", + "1. **Online Courses**:\n", + "\t* Andrew Ng's Machine Learning course on Coursera: A popular and comprehensive introduction to machine learning.\n", + "\t* Stanford University's Natural Language Processing with Deep Learning Specialization on Coursera: Covers NLP fundamentals and deep learning techniques.\n", + "2. **Books**:\n", + "\t* \"Introduction to Artificial Intelligence\" by Philip C. Jackson Jr.: A gentle introduction to AI concepts, including machine learning and computer vision.\n", + "\t* \"Deep Learning\" by Ian Goodfellow, Yoshua Bengio, and Aaron Courville: A detailed book on deep learning techniques, although it may require some prior knowledge of linear algebra and calculus.\n", + "3. **Websites and Blogs**:\n", + "\t* Machine Learning Mastery: A website offering tutorials, examples, and explanations on various machine learning topics.\n", + "\t* KDnuggets: A popular blog covering AI, machine learning, and data science news, tutorials, and research papers.\n", + "4. **YouTube Channels**:\n", + "\t* 3Blue1Brown (Grant Sanderson): Engaging video explanations on AI, machine learning, and linear algebra concepts.\n", + "\t* Sentdex: Offers video tutorials on various AI topics, including machine learning, deep learning, and computer vision.\n", + "5. **Communities and Forums**:\n", + "\t* Kaggle: A platform for data science competitions and hosting datasets, where you can learn from others and participate in discussions.\n", + "\t* Reddit's r/MachineLearning and r/AI: Active communities discussing AI-related topics, sharing resources, and providing feedback on projects.\n", + "\n", + "Remember, learning about AI is a continuous process. Start with the basics, build projects, and gradually move on to more advanced topics. Practice and experimentation are key to gaining hands-on experience.\n", + "\n", + "What specific area of AI would you like to explore first, Alice?\n", + "\n", + "============================================================\n", + "Turn 3\n", + "============================================================\n", + "User: Can you remind me what my name is?\n", + "Assistant: " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your name is Alice! I remember that from our previous conversation when you introduced yourself as someone interested in learning about AI. How can I assist you further today?Your name is Alice! I remember that from our previous conversation when you introduced yourself as someone interested in learning about AI. How can I assist you further today?\n", + "\n", + "✅ Completed 3 conversational turns with context retention\n" + ] + } + ], + "source": [ + "# Conversation turns that build on each other\n", + "conversation_turns = [\n", + " \"My name is Alice and I'm learning about AI.\",\n", + " \"What are some good resources for beginners?\",\n", + " \"Can you remind me what my name is?\",\n", + "]\n", + "\n", + "for i, query in enumerate(conversation_turns, 1):\n", + " print(f\"\\n{'='*60}\")\n", + " print(f\"Turn {i}\")\n", + " print(f\"{'='*60}\")\n", + " print(f\"User: {query}\")\n", + "\n", + " response = conv_agent.create_turn(\n", + " session_id=conv_session_id,\n", + " messages=[UserMessage(content=query, role=\"user\")],\n", + " stream=True,\n", + " )\n", + "\n", + " print(\"Assistant: \", end='')\n", + " for chunk in response:\n", + " if chunk.event.event_type == \"turn_completed\":\n", + " output = chunk.event.final_text\n", + " print(output)\n", + " break\n", + " elif chunk.event.event_type == \"step_progress\":\n", + " if hasattr(chunk.event.delta, 'text'):\n", + " print(chunk.event.delta.text, end='', flush=True)\n", + "\n", + "print(f\"\\n✅ Completed {len(conversation_turns)} conversational turns with context retention\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: DELETE http://localhost:8321/v1/conversations/conv_936121c2e27b7d1f7d3f0b6a62adce867d79268f5f9ce265 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Session cleaned up\n" + ] + } + ], + "source": [ + "# Cleanup\n", + "client.conversations.delete(conversation_id=conv_session_id)\n", + "print(\"✅ Session cleaned up\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.2 RAG (Retrieval-Augmented Generation) Pattern\n", + "\n", + "Demonstrate how to provide context to the agent for more accurate responses." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Knowledge base: 3 Paul Graham essay excerpts\n", + " - pg_essay_1: What I Worked On\n", + " - pg_essay_2: How to Start a Startup\n", + " - pg_essay_3: Maker's Schedule, Manager's Schedule\n" + ] + } + ], + "source": [ + "# Sample knowledge base: Paul Graham essay excerpts\n", + "# This is a common RAG example - using actual content from Paul Graham's essays\n", + "documents = [\n", + " {\n", + " \"doc_id\": \"pg_essay_1\",\n", + " \"content\": \"\"\"What I Worked On\n", + "\n", + " Before college the two main things I worked on, outside of school, were writing and programming.\n", + " I didn't write essays. I wrote what beginning writers were supposed to write then, and probably\n", + " still are: short stories. My stories were awful. They had hardly any plot, just characters with\n", + " strong feelings, which I imagined made them deep.\n", + "\n", + " The first programs I tried writing were on the IBM 1401 that our school district used for what\n", + " was then called 'data processing.' This was in 9th grade, so I was 13 or 14. The school district's\n", + " 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got\n", + " permission to use it.\"\"\",\n", + " \"metadata\": {\"essay\": \"What I Worked On\", \"author\": \"Paul Graham\", \"year\": 2021}\n", + " },\n", + " {\n", + " \"doc_id\": \"pg_essay_2\",\n", + " \"content\": \"\"\"How to Start a Startup\n", + "\n", + " You need three things to create a successful startup: to start with good people, to make something\n", + " customers actually want, and to spend as little money as possible. Most startups that fail do it\n", + " because they fail at one of these. A startup that does all three will probably succeed.\n", + "\n", + " And that's kind of exciting, when you think about it, because all three are doable. Hard, but doable.\n", + " And since a startup that succeeds ordinarily makes its founders rich, that implies getting rich is\n", + " doable too. Hard, but doable.\"\"\",\n", + " \"metadata\": {\"essay\": \"How to Start a Startup\", \"author\": \"Paul Graham\", \"year\": 2005}\n", + " },\n", + " {\n", + " \"doc_id\": \"pg_essay_3\",\n", + " \"content\": \"\"\"Maker's Schedule, Manager's Schedule\n", + "\n", + " One reason programmers dislike meetings so much is that they're on a different type of schedule\n", + " from other people. Meetings cost them more.\n", + "\n", + " There are two types of schedule, which I'll call the manager's schedule and the maker's schedule.\n", + " The manager's schedule is for bosses. It's embodied in the traditional appointment book, with each\n", + " day cut into one hour intervals. When you use time that way, it's merely a practical problem to\n", + " meet with someone. But there's another way of using time that's common among people who make things,\n", + " like programmers and writers. They generally prefer to use time in units of half a day at least.\"\"\",\n", + " \"metadata\": {\"essay\": \"Maker's Schedule, Manager's Schedule\", \"author\": \"Paul Graham\", \"year\": 2009}\n", + " },\n", + "]\n", + "\n", + "print(f\"Knowledge base: {len(documents)} Paul Graham essay excerpts\")\n", + "for doc in documents:\n", + " print(f\" - {doc['doc_id']}: {doc['metadata']['essay']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created RAG agent\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/conversations \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created session: conv_9ae94374c781501f2d712620dcc8e55961b5a226df229b1d\n" + ] + } + ], + "source": [ + "# Create RAG-enabled agent\n", + "rag_agent = Agent(\n", + " client=client,\n", + " model=\"ollama/llama3.3:70b\",\n", + " instructions=(\n", + " \"You are a helpful AI assistant with access to a knowledge base. \"\n", + " \"When answering questions, use the provided context from the knowledge base. \"\n", + " \"If the context doesn't contain relevant information, say so.\"\n", + " ),\n", + ")\n", + "\n", + "print(\"✅ Created RAG agent\")\n", + "\n", + "rag_session_id = rag_agent.create_session(session_name=\"rag_session\")\n", + "print(f\"✅ Created session: {rag_session_id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: What did Paul Graham work on before college?\n", + "Retrieved 1 relevant document(s)\n", + "\n", + "Answer: Based on the provided context from \"What I Worked On\", before college, Paul Graham worked on two main things outside of school: \n", + "\n", + "1. Writing (specifically short stories)\n", + "2. Programming (initially on the IBM 1401)Based on the provided context from \"What I Worked On\", before college, Paul Graham worked on two main things outside of school: \n", + "\n", + "1. Writing (specifically short stories)\n", + "2. Programming (initially on the IBM 1401)\n", + "\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: DELETE http://localhost:8321/v1/conversations/conv_9ae94374c781501f2d712620dcc8e55961b5a226df229b1d \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Session cleaned up\n" + ] + } + ], + "source": [ + "# Query with context from Paul Graham essays\n", + "query = \"What did Paul Graham work on before college?\"\n", + "\n", + "# Simulate retrieval (in production, use vector search)\n", + "relevant_docs = [doc for doc in documents if \"before college\" in doc[\"content\"].lower()]\n", + "context = \"\\n\\n\".join([f\"From '{doc['metadata']['essay']}':\\n{doc['content']}\"\n", + " for doc in relevant_docs])\n", + "\n", + "# Create prompt with retrieved context\n", + "prompt_with_context = f\"\"\"Context from knowledge base:\n", + "{context}\n", + "\n", + "Question: {query}\n", + "\n", + "Please answer based on the provided context.\"\"\"\n", + "\n", + "print(f\"Query: {query}\")\n", + "print(f\"Retrieved {len(relevant_docs)} relevant document(s)\\n\")\n", + "print(\"Answer: \", end='')\n", + "\n", + "response = rag_agent.create_turn(\n", + " session_id=rag_session_id,\n", + " messages=[UserMessage(content=prompt_with_context, role=\"user\")],\n", + " stream=True,\n", + ")\n", + "\n", + "for chunk in response:\n", + " if chunk.event.event_type == \"turn_completed\":\n", + " output = chunk.event.final_text\n", + " print(output)\n", + " break\n", + " elif chunk.event.event_type == \"step_progress\":\n", + " if hasattr(chunk.event.delta, 'text'):\n", + " print(chunk.event.delta.text, end='', flush=True)\n", + "\n", + "print(\"\\n\")\n", + "client.conversations.delete(conversation_id=rag_session_id)\n", + "print(\"✅ Session cleaned up\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Part 3: MCP (Model Context Protocol) Tools\n", + "\n", + "MCP provides a standardized way for AI models to interact with external tools and data sources.\n", + "\n", + "We'll demonstrate:\n", + "- Defining MCP-compatible tools\n", + "- Agent tool selection\n", + "- Tool execution and response handling" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created 3 MCP tools:\n", + " - get_weather: Get current weather information for a specified location\n", + " - execute_code: Execute Python code and return the result\n", + " - web_search: Search the web for information\n" + ] + } + ], + "source": [ + "def create_mcp_tools():\n", + " \"\"\"Create MCP-compatible tool definitions.\"\"\"\n", + " return [\n", + " {\n", + " \"tool_name\": \"get_weather\",\n", + " \"description\": \"Get current weather information for a specified location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"City and state/country, e.g., 'San Francisco, CA'\"\n", + " },\n", + " \"unit\": {\n", + " \"type\": \"string\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"],\n", + " \"description\": \"Temperature unit\",\n", + " \"default\": \"fahrenheit\"\n", + " }\n", + " },\n", + " \"required\": [\"location\"]\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"execute_code\",\n", + " \"description\": \"Execute Python code and return the result\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"code\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Python code to execute\"\n", + " }\n", + " },\n", + " \"required\": [\"code\"]\n", + " }\n", + " },\n", + " {\n", + " \"tool_name\": \"web_search\",\n", + " \"description\": \"Search the web for information\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"query\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Search query\"\n", + " }\n", + " },\n", + " \"required\": [\"query\"]\n", + " }\n", + " },\n", + " ]\n", + "\n", + "tools = create_mcp_tools()\n", + "print(f\"Created {len(tools)} MCP tools:\")\n", + "for tool in tools:\n", + " print(f\" - {tool['tool_name']}: {tool['description']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MCP tool configuration ready\n", + " Server: http://localhost:3000/sse\n", + " Format: MCP server-based\n", + "\n", + " To use MCP tools:\n", + " 1. Set up your MCP server\n", + " 2. Update MCP_SERVER_URL and MCP_ACCESS_TOKEN above\n", + " 3. Pass mcp_tools to Agent(tools=mcp_tools)\n" + ] + } + ], + "source": [ + "# Example 2: MCP Server Configuration (0.3.0 format)\n", + "\n", + "# MCP server configuration\n", + "# Replace with your actual MCP server URL and credentials\n", + "MCP_SERVER_URL = \"https://api.example.com/mcp\" # Your MCP server endpoint\n", + "MCP_ACCESS_TOKEN = \"your-token-here\" # Your authentication token\n", + "\n", + "MCP_ACCESS_TOKEN = \"YOUR_ACCESS_TOKEN_HERE\"\n", + "## ran an MCP server locally, you can replace this field with your mcp server url\n", + "MCP_SERVER_URL = \"http://localhost:3000/sse\"\n", + "#MCP_SERVER_URL = \"https://mcp.deepwiki.com/sse\"\n", + "mcp_tools = [\n", + " {\n", + " \"type\": \"mcp\",\n", + " \"server_url\": MCP_SERVER_URL,\n", + " \"server_label\": \"weather\",\n", + " \"headers\": {\n", + " \"Authorization\": f\"Bearer {MCP_ACCESS_TOKEN}\",\n", + " },\n", + " }\n", + "]\n", + "\n", + "\n", + "print(\"MCP tool configuration ready\")\n", + "print(f\" Server: {MCP_SERVER_URL}\")\n", + "print(\" Format: MCP server-based\")\n", + "print(\"\\n To use MCP tools:\")\n", + "print(\" 1. Set up your MCP server\")\n", + "print(\" 2. Update MCP_SERVER_URL and MCP_ACCESS_TOKEN above\")\n", + "print(\" 3. Pass mcp_tools to Agent(tools=mcp_tools)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tool execution simulator ready\n" + ] + } + ], + "source": [ + "def simulate_tool_execution(tool_name: str, arguments: Dict[str, Any]) -> str:\n", + " \"\"\"Simulate tool execution (replace with real implementations).\"\"\"\n", + " if tool_name == \"get_weather\":\n", + " location = arguments.get(\"location\", \"Unknown\")\n", + " unit = arguments.get(\"unit\", \"fahrenheit\")\n", + " temp = \"72°F\" if unit == \"fahrenheit\" else \"22°C\"\n", + " return json.dumps({\n", + " \"location\": location,\n", + " \"temperature\": temp,\n", + " \"condition\": \"Partly cloudy\",\n", + " \"humidity\": \"65%\",\n", + " \"wind\": \"10 mph NW\"\n", + " })\n", + " elif tool_name == \"execute_code\":\n", + " code = arguments.get(\"code\", \"\")\n", + " return json.dumps({\n", + " \"status\": \"success\",\n", + " \"output\": f\"Code execution simulated for: {code[:50]}...\"\n", + " })\n", + " elif tool_name == \"web_search\":\n", + " query = arguments.get(\"query\", \"\")\n", + " return json.dumps({\n", + " \"status\": \"success\",\n", + " \"results\": [\n", + " {\"title\": f\"Result {i+1}\", \"url\": f\"https://example.com/{i+1}\",\n", + " \"snippet\": f\"Information about {query}\"}\n", + " for i in range(3)\n", + " ]\n", + " })\n", + " return json.dumps({\"error\": \"Unknown tool\"})\n", + "\n", + "print(\"Tool execution simulator ready\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created MCP agent\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/conversations \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Created session: conv_5613324aa4c3193b1434bf562fe1c75dc2e0563c681738b1\n" + ] + } + ], + "source": [ + "mcp_agent = Agent(\n", + " client=client,\n", + " model=\"ollama/llama3.3:70b\",\n", + " instructions=\"You are a helpful AI assistant that can answer questions and help with various tasks.\",\n", + " tools=mcp_tools # you can set this field to tools when experimenting with the tools created by create_mcp_tools above.\n", + ")\n", + "\n", + "print(\"Created MCP agent\")\n", + "\n", + "mcp_session_id = mcp_agent.create_session(session_name=\"mcp_tools_session\")\n", + "print(f\"✅ Created session: {mcp_session_id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "MCP TOOL EXAMPLE\n", + "======================================================================\n", + "\n", + " User: What's the weather like in New York City?\n", + "\n", + " Assistant: \n", + "\n", + " [Tool Execution Started]\n", + "\n", + "\n", + " [Tool Execution Started]\n", + "The current weather in New York City is mostly cloudy with a temperature of 49°F and a wind speed of 17 mph NE. Today, it will be partly sunny with a high of 55°F. Tonight, there's a chance of rain showers with a low of 53°F. The rest of the week will see a mix of rain, thunderstorms, and sunshine, with temperatures ranging from the mid-50s to the mid-60s. It's a good idea to check the forecast regularly for updates.The current weather in New York City is mostly cloudy with a temperature of 49°F and a wind speed of 17 mph NE. Today, it will be partly sunny with a high of 55°F. Tonight, there's a chance of rain showers with a low of 53°F. The rest of the week will see a mix of rain, thunderstorms, and sunshine, with temperatures ranging from the mid-50s to the mid-60s. It's a good idea to check the forecast regularly for updates.\n", + "\n", + "\n", + " Summary: Used 2 tool(s) to answer the query\n" + ] + } + ], + "source": [ + "# Example: Weather query that should trigger tool usage\n", + "query = \"What's the weather like in New York City?\"\n", + "\n", + "print(f\"{'='*70}\")\n", + "print(f\"MCP TOOL EXAMPLE\")\n", + "print(f\"{'='*70}\")\n", + "print(f\"\\n User: {query}\")\n", + "\n", + "response = mcp_agent.create_turn(\n", + " session_id=mcp_session_id,\n", + " messages=[UserMessage(content=query, role=\"user\")],\n", + " stream=True,\n", + ")\n", + "\n", + "print(\"\\n Assistant: \", end='')\n", + "tool_calls_made = []\n", + "\n", + "for chunk in response:\n", + " event_type = chunk.event.event_type\n", + "\n", + " if event_type == \"step_started\":\n", + " if chunk.event.step_type == \"tool_execution\":\n", + " print(f\"\\n\\n [Tool Execution Started]\")\n", + "\n", + " elif event_type == \"step_progress\":\n", + " # Check for tool call deltas\n", + " if hasattr(chunk.event.delta, 'delta_type'):\n", + " if chunk.event.delta.delta_type == \"tool_call_issued\":\n", + " tool_calls_made.append(chunk.event.delta)\n", + " result = simulate_tool_execution(\n", + " chunk.event.delta.tool_name,\n", + " json.loads(chunk.event.delta.arguments)\n", + " )\n", + " if hasattr(chunk.event.delta, 'text'):\n", + " print(chunk.event.delta.text, end='', flush=True)\n", + "\n", + " elif event_type == \"turn_completed\":\n", + " output = chunk.event.final_text\n", + " if output:\n", + " print(output)\n", + "\n", + "print()\n", + "if tool_calls_made:\n", + " print(f\"\\n Summary: Used {len(tool_calls_made)} tool(s) to answer the query\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: DELETE http://localhost:8321/v1/conversations/conv_5613324aa4c3193b1434bf562fe1c75dc2e0563c681738b1 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Session cleaned up\n" + ] + } + ], + "source": [ + "# Cleanup\n", + "client.conversations.delete(conversation_id=mcp_session_id)\n", + "print(\"✅ Session cleaned up\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Summary\n", + "\n", + "This notebook demonstrated three levels of Llama Stack agent capabilities:\n", + "\n", + "## 1. Basic Agent\n", + "- ✅ Simple agent creation\n", + "- ✅ Session management \n", + "- ✅ Streaming responses\n", + "\n", + "## 2. Advanced Features\n", + "- ✅ Multi-turn conversations\n", + "- ✅ RAG (Retrieval-Augmented Generation) pattern\n", + "- ✅ Custom knowledge base integration\n", + "\n", + "## 3. MCP Tools Integration\n", + "- ✅ MCP-compatible tool definitions\n", + "- ✅ Automatic tool selection by the agent\n", + "- ✅ Tool execution and response handling\n", + "- ✅ Real-time streaming with tool calls\n", + "\n", + "\n", + "## Resources\n", + "\n", + "- [Llama Stack Documentation](https://llama-stack.readthedocs.io/)\n", + "- [Llama Stack GitHub](https://github.com/meta-llama/llama-stack)\n", + "- [MCP Protocol Specification](https://modelcontextprotocol.io/)\n", + "- [Ollama Documentation](https://ollama.ai/)" + ] + } + ], + "metadata": { + "fileHeader": "", + "fileUid": "92b7454e-a941-41f0-bd02-6d5e728f20f1", + "isAdHoc": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb index 0ce9c6f5f..7bcafd3a1 100644 --- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb +++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb @@ -74,6 +74,7 @@ "source": [ "```bash\n", "uv sync --extra dev\n", + "uv pip install -U llama-stack-client\n", "uv pip install -e .\n", "source .venv/bin/activate\n", "```" diff --git a/docs/openapi_generator/README.md b/docs/openapi_generator/README.md deleted file mode 100644 index 85021d911..000000000 --- a/docs/openapi_generator/README.md +++ /dev/null @@ -1 +0,0 @@ -The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility. diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py deleted file mode 100644 index b489833b3..000000000 --- a/docs/openapi_generator/generate.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described found in the -# LICENSE file in the root directory of this source tree. - -from datetime import datetime -from pathlib import Path -import sys -import fire -import ruamel.yaml as yaml - -from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402 -from llama_stack.core.stack import LlamaStack # noqa: E402 - -from .pyopenapi.options import Options # noqa: E402 -from .pyopenapi.specification import Info, Server # noqa: E402 -from .pyopenapi.utility import Specification, validate_api # noqa: E402 - - -def str_presenter(dumper, data): - if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith( - "#/components/schemas/" - ): - style = None - else: - style = ">" if "\n" in data or len(data) > 40 else None - return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style) - - -def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: bool = False, combined_spec: bool = False): - """Generate OpenAPI spec with optional stability filtering.""" - - if combined_spec: - # Special case for combined stable + experimental APIs - title_suffix = " - Stable & Experimental APIs" - filename_prefix = "stainless-" - description_suffix = "\n\n**🔗 COMBINED**: This specification includes both stable production-ready APIs and experimental pre-release APIs. Use stable APIs for production deployments and experimental APIs for testing new features." - # Use the special "stainless" filter to include stable + experimental APIs - stability_filter = "stainless" - elif stability_filter: - title_suffix = { - "stable": " - Stable APIs" if not main_spec else "", - "experimental": " - Experimental APIs", - "deprecated": " - Deprecated APIs" - }.get(stability_filter, f" - {stability_filter.title()} APIs") - - # Use main spec filename for stable when main_spec=True - if main_spec and stability_filter == "stable": - filename_prefix = "" - else: - filename_prefix = f"{stability_filter}-" - - description_suffix = { - "stable": "\n\n**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.", - "experimental": "\n\n**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before becoming stable.", - "deprecated": "\n\n**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for migration reference only." - }.get(stability_filter, "") - else: - title_suffix = "" - filename_prefix = "" - description_suffix = "" - - spec = Specification( - LlamaStack, - Options( - server=Server(url="http://any-hosted-llama-stack.com"), - info=Info( - title=f"Llama Stack Specification{title_suffix}", - version=LLAMA_STACK_API_V1, - description=f"""This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are tailored to - best leverage Llama Models.{description_suffix}""", - ), - include_standard_error_responses=True, - stability_filter=stability_filter, # Pass the filter to the generator - ), - ) - - yaml_filename = f"{filename_prefix}llama-stack-spec.yaml" - html_filename = f"{filename_prefix}llama-stack-spec.html" - - with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp: - y = yaml.YAML() - y.default_flow_style = False - y.block_seq_indent = 2 - y.map_indent = 2 - y.sequence_indent = 4 - y.sequence_dash_offset = 2 - y.width = 80 - y.allow_unicode = True - y.representer.add_representer(str, str_presenter) - - y.dump( - spec.get_json(), - fp, - ) - - with open(output_dir / html_filename, "w") as fp: - spec.write_html(fp, pretty_print=True) - - print(f"Generated {yaml_filename} and {html_filename}") - -def main(output_dir: str): - output_dir = Path(output_dir) - if not output_dir.exists(): - raise ValueError(f"Directory {output_dir} does not exist") - - # Validate API protocols before generating spec - return_type_errors = validate_api() - if return_type_errors: - print("\nAPI Method Return Type Validation Errors:\n") - for error in return_type_errors: - print(error, file=sys.stderr) - sys.exit(1) - - now = str(datetime.now()) - print(f"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at {now}") - print("") - - # Generate main spec as stable APIs (llama-stack-spec.yaml) - print("Generating main specification (stable APIs)...") - generate_spec(output_dir, "stable", main_spec=True) - - print("Generating other stability-filtered specifications...") - generate_spec(output_dir, "experimental") - generate_spec(output_dir, "deprecated") - - print("Generating combined stable + experimental specification...") - generate_spec(output_dir, combined_spec=True) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/docs/openapi_generator/pyopenapi/README.md b/docs/openapi_generator/pyopenapi/README.md deleted file mode 100644 index 1b5fbce19..000000000 --- a/docs/openapi_generator/pyopenapi/README.md +++ /dev/null @@ -1 +0,0 @@ -This is forked from https://github.com/hunyadi/pyopenapi diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py deleted file mode 100644 index 30fc9038d..000000000 --- a/docs/openapi_generator/pyopenapi/generator.py +++ /dev/null @@ -1,1175 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import hashlib -import inspect -import ipaddress -import os -import types -import typing -from dataclasses import make_dataclass -from pathlib import Path -from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union - -from fastapi import UploadFile - -from llama_stack.apis.datatypes import Error -from llama_stack.strong_typing.core import JsonType -from llama_stack.strong_typing.docstring import Docstring, parse_type -from llama_stack.strong_typing.inspection import ( - is_generic_list, - is_type_optional, - is_type_union, - is_unwrapped_body_param, - unwrap_generic_list, - unwrap_optional_type, - unwrap_union_types, -) -from llama_stack.strong_typing.name import python_type_to_name -from llama_stack.strong_typing.schema import ( - get_schema_identifier, - JsonSchemaGenerator, - register_schema, - Schema, - SchemaOptions, -) -from llama_stack.strong_typing.serialization import json_dump_string, object_to_json -from pydantic import BaseModel - -from .operations import ( - EndpointOperation, - get_endpoint_events, - get_endpoint_operations, - HTTPMethod, -) -from .options import * -from .specification import ( - Components, - Document, - Example, - ExampleRef, - ExtraBodyParameter, - MediaType, - Operation, - Parameter, - ParameterLocation, - PathItem, - RequestBody, - Response, - ResponseRef, - SchemaOrRef, - SchemaRef, - Tag, - TagGroup, -) - -register_schema( - ipaddress.IPv4Address, - schema={ - "type": "string", - "format": "ipv4", - "title": "IPv4 address", - "description": "IPv4 address, according to dotted-quad ABNF syntax as defined in RFC 2673, section 3.2.", - }, - examples=["192.0.2.0", "198.51.100.1", "203.0.113.255"], -) - -register_schema( - ipaddress.IPv6Address, - schema={ - "type": "string", - "format": "ipv6", - "title": "IPv6 address", - "description": "IPv6 address, as defined in RFC 2373, section 2.2.", - }, - examples=[ - "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", - "1080:0:0:0:8:800:200C:417A", - "1080::8:800:200C:417A", - "FF01::101", - "::1", - ], -) - - -def http_status_to_string(status_code: HTTPStatusCode) -> str: - "Converts an HTTP status code to a string." - - if isinstance(status_code, HTTPStatus): - return str(status_code.value) - elif isinstance(status_code, int): - return str(status_code) - elif isinstance(status_code, str): - return status_code - else: - raise TypeError("expected: HTTP status code") - - -class SchemaBuilder: - schema_generator: JsonSchemaGenerator - schemas: Dict[str, Schema] - - def __init__(self, schema_generator: JsonSchemaGenerator) -> None: - self.schema_generator = schema_generator - self.schemas = {} - - def classdef_to_schema(self, typ: type) -> Schema: - """ - Converts a type to a JSON schema. - For nested types found in the type hierarchy, adds the type to the schema registry in the OpenAPI specification section `components`. - """ - - type_schema, type_definitions = self.schema_generator.classdef_to_schema(typ) - - # append schema to list of known schemas, to be used in OpenAPI's Components Object section - for ref, schema in type_definitions.items(): - self._add_ref(ref, schema) - - return type_schema - - def classdef_to_named_schema(self, name: str, typ: type) -> Schema: - schema = self.classdef_to_schema(typ) - self._add_ref(name, schema) - return schema - - def classdef_to_ref(self, typ: type) -> SchemaOrRef: - """ - Converts a type to a JSON schema, and if possible, returns a schema reference. - For composite types (such as classes), adds the type to the schema registry in the OpenAPI specification section `components`. - """ - - type_schema = self.classdef_to_schema(typ) - if typ is str or typ is int or typ is float: - # represent simple types as themselves - return type_schema - - type_name = get_schema_identifier(typ) - if type_name is not None: - return self._build_ref(type_name, type_schema) - - try: - type_name = python_type_to_name(typ) - return self._build_ref(type_name, type_schema) - except TypeError: - pass - - return type_schema - - def _build_ref(self, type_name: str, type_schema: Schema) -> SchemaRef: - self._add_ref(type_name, type_schema) - return SchemaRef(type_name) - - def _add_ref(self, type_name: str, type_schema: Schema) -> None: - if type_name not in self.schemas: - self.schemas[type_name] = type_schema - - -class ContentBuilder: - schema_builder: SchemaBuilder - schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]] - sample_transformer: Optional[Callable[[JsonType], JsonType]] - - def __init__( - self, - schema_builder: SchemaBuilder, - schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]] = None, - sample_transformer: Optional[Callable[[JsonType], JsonType]] = None, - ) -> None: - self.schema_builder = schema_builder - self.schema_transformer = schema_transformer - self.sample_transformer = sample_transformer - - def build_content( - self, payload_type: type, examples: Optional[List[Any]] = None - ) -> Dict[str, MediaType]: - "Creates the content subtree for a request or response." - - def is_iterator_type(t): - return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t) - - def get_media_type(t): - if is_generic_list(t): - return "application/jsonl" - elif is_iterator_type(t): - return "text/event-stream" - else: - return "application/json" - - if typing.get_origin(payload_type) in (typing.Union, types.UnionType): - media_types = [] - item_types = [] - for x in typing.get_args(payload_type): - media_types.append(get_media_type(x)) - item_types.append(x) - - if len(set(media_types)) == 1: - # all types have the same media type - return {media_types[0]: self.build_media_type(payload_type, examples)} - else: - # different types have different media types - return { - media_type: self.build_media_type(item_type, examples) - for media_type, item_type in zip(media_types, item_types) - } - - if is_generic_list(payload_type): - media_type = "application/jsonl" - item_type = unwrap_generic_list(payload_type) - else: - media_type = "application/json" - item_type = payload_type - - return {media_type: self.build_media_type(item_type, examples)} - - def build_media_type( - self, item_type: type, examples: Optional[List[Any]] = None - ) -> MediaType: - schema = self.schema_builder.classdef_to_ref(item_type) - if self.schema_transformer: - schema_transformer: Callable[[SchemaOrRef], SchemaOrRef] = ( - self.schema_transformer - ) - schema = schema_transformer(schema) - - if not examples: - return MediaType(schema=schema) - - if len(examples) == 1: - return MediaType(schema=schema, example=self._build_example(examples[0])) - - return MediaType( - schema=schema, - examples=self._build_examples(examples), - ) - - def _build_examples( - self, examples: List[Any] - ) -> Dict[str, Union[Example, ExampleRef]]: - "Creates a set of several examples for a media type." - - if self.sample_transformer: - sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer # type: ignore - else: - sample_transformer = lambda sample: sample - - results: Dict[str, Union[Example, ExampleRef]] = {} - for example in examples: - value = sample_transformer(object_to_json(example)) - - hash_string = ( - hashlib.sha256(json_dump_string(value).encode("utf-8")) - .digest() - .hex()[:16] - ) - name = f"ex-{hash_string}" - - results[name] = Example(value=value) - - return results - - def _build_example(self, example: Any) -> Any: - "Creates a single example for a media type." - - if self.sample_transformer: - sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer # type: ignore - else: - sample_transformer = lambda sample: sample - - return sample_transformer(object_to_json(example)) - - -@dataclass -class ResponseOptions: - """ - Configuration options for building a response for an operation. - - :param type_descriptions: Maps each response type to a textual description (if available). - :param examples: A list of response examples. - :param status_catalog: Maps each response type to an HTTP status code. - :param default_status_code: HTTP status code assigned to responses that have no mapping. - """ - - type_descriptions: Dict[type, str] - examples: Optional[List[Any]] - status_catalog: Dict[type, HTTPStatusCode] - default_status_code: HTTPStatusCode - - -@dataclass -class StatusResponse: - status_code: str - types: List[type] = dataclasses.field(default_factory=list) - examples: List[Any] = dataclasses.field(default_factory=list) - - -def create_docstring_for_request( - request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str] -) -> str: - """Creates a ReST-style docstring for a dynamically generated request dataclass.""" - lines = ["\n"] # Short description - - # Add parameter documentation in ReST format - for name, type_ in fields: - desc = doc_params.get(name, "") - lines.append(f":param {name}: {desc}") - - return "\n".join(lines) - - -class ResponseBuilder: - content_builder: ContentBuilder - - def __init__(self, content_builder: ContentBuilder) -> None: - self.content_builder = content_builder - - def _get_status_responses( - self, options: ResponseOptions - ) -> Dict[str, StatusResponse]: - status_responses: Dict[str, StatusResponse] = {} - - for response_type in options.type_descriptions.keys(): - status_code = http_status_to_string( - options.status_catalog.get(response_type, options.default_status_code) - ) - - # look up response for status code - if status_code not in status_responses: - status_responses[status_code] = StatusResponse(status_code) - status_response = status_responses[status_code] - - # append response types that are assigned the given status code - status_response.types.append(response_type) - - # append examples that have the matching response type - if options.examples: - status_response.examples.extend( - example - for example in options.examples - if isinstance(example, response_type) - ) - - return dict(sorted(status_responses.items())) - - def build_response( - self, options: ResponseOptions - ) -> Dict[str, Union[Response, ResponseRef]]: - """ - Groups responses that have the same status code. - """ - - responses: Dict[str, Union[Response, ResponseRef]] = {} - status_responses = self._get_status_responses(options) - for status_code, status_response in status_responses.items(): - response_types = tuple(status_response.types) - if len(response_types) > 1: - composite_response_type: type = Union[response_types] # type: ignore - else: - (response_type,) = response_types - composite_response_type = response_type - - description = " **OR** ".join( - filter( - None, - ( - options.type_descriptions[response_type] - for response_type in response_types - ), - ) - ) - - responses[status_code] = self._build_response( - response_type=composite_response_type, - description=description, - examples=status_response.examples or None, - ) - - return responses - - def _build_response( - self, - response_type: type, - description: str, - examples: Optional[List[Any]] = None, - ) -> Response: - "Creates a response subtree." - - if response_type is not None: - return Response( - description=description, - content=self.content_builder.build_content(response_type, examples), - ) - else: - return Response(description=description) - - -def schema_error_wrapper(schema: SchemaOrRef) -> Schema: - "Wraps an error output schema into a top-level error schema." - - return { - "type": "object", - "properties": { - "error": schema, # type: ignore - }, - "additionalProperties": False, - "required": [ - "error", - ], - } - - -def sample_error_wrapper(error: JsonType) -> JsonType: - "Wraps an error output sample into a top-level error sample." - - return {"error": error} - - -class Generator: - endpoint: type - options: Options - schema_builder: SchemaBuilder - responses: Dict[str, Response] - - def __init__(self, endpoint: type, options: Options) -> None: - self.endpoint = endpoint - self.options = options - schema_generator = JsonSchemaGenerator( - SchemaOptions( - definitions_path="#/components/schemas/", - use_examples=self.options.use_examples, - property_description_fun=options.property_description_fun, - ) - ) - self.schema_builder = SchemaBuilder(schema_generator) - self.responses = {} - - # Create standard error responses - self._create_standard_error_responses() - - def _create_standard_error_responses(self) -> None: - """ - Creates standard error responses that can be reused across operations. - These will be added to the components.responses section of the OpenAPI document. - """ - # Get the Error schema - error_schema = self.schema_builder.classdef_to_ref(Error) - - # Create standard error responses - self.responses["BadRequest400"] = Response( - description="The request was invalid or malformed", - content={ - "application/json": MediaType( - schema=error_schema, - example={ - "status": 400, - "title": "Bad Request", - "detail": "The request was invalid or malformed", - }, - ) - }, - ) - - self.responses["TooManyRequests429"] = Response( - description="The client has sent too many requests in a given amount of time", - content={ - "application/json": MediaType( - schema=error_schema, - example={ - "status": 429, - "title": "Too Many Requests", - "detail": "You have exceeded the rate limit. Please try again later.", - }, - ) - }, - ) - - self.responses["InternalServerError500"] = Response( - description="The server encountered an unexpected error", - content={ - "application/json": MediaType( - schema=error_schema, - example={ - "status": 500, - "title": "Internal Server Error", - "detail": "An unexpected error occurred. Our team has been notified.", - }, - ) - }, - ) - - # Add a default error response for any unhandled error cases - self.responses["DefaultError"] = Response( - description="An unexpected error occurred", - content={ - "application/json": MediaType( - schema=error_schema, - example={ - "status": 0, - "title": "Error", - "detail": "An unexpected error occurred", - }, - ) - }, - ) - - def _build_type_tag(self, ref: str, schema: Schema) -> Tag: - # Don't include schema definition in the tag description because for one, - # it is not very valuable and for another, it causes string formatting - # discrepancies via the Stainless Studio. - # - # definition = f'' - title = typing.cast(str, schema.get("title")) - description = typing.cast(str, schema.get("description")) - return Tag( - name=ref, - description="\n\n".join(s for s in (title, description) if s is not None), - ) - - def _build_extra_tag_groups( - self, extra_types: Dict[str, Dict[str, type]] - ) -> Dict[str, List[Tag]]: - """ - Creates a dictionary of tag group captions as keys, and tag lists as values. - - :param extra_types: A dictionary of type categories and list of types in that category. - """ - - extra_tags: Dict[str, List[Tag]] = {} - - for category_name, category_items in extra_types.items(): - tag_list: List[Tag] = [] - - for name, extra_type in category_items.items(): - schema = self.schema_builder.classdef_to_schema(extra_type) - tag_list.append(self._build_type_tag(name, schema)) - - if tag_list: - extra_tags[category_name] = tag_list - - return extra_tags - - def _get_api_group_for_operation(self, op) -> str | None: - """ - Determine the API group for an operation based on its route path. - - Args: - op: The endpoint operation - - Returns: - The API group name derived from the route, or None if unable to determine - """ - if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'): - return None - - route = op.webmethod.route - if not route or not route.startswith('/'): - return None - - # Extract API group from route path - # Examples: /v1/agents/list -> agents-api - # /v1/responses -> responses-api - # /v1/models -> models-api - path_parts = route.strip('/').split('/') - - if len(path_parts) < 2: - return None - - # Skip version prefix (v1, v1alpha, v1beta, etc.) - if path_parts[0].startswith('v1'): - if len(path_parts) < 2: - return None - api_segment = path_parts[1] - else: - api_segment = path_parts[0] - - # Convert to supplementary file naming convention - # agents -> agents-api, responses -> responses-api, etc. - return f"{api_segment}-api" - - def _load_supplemental_content(self, api_group: str | None) -> str: - """ - Load supplemental content for an API group based on stability level. - - Follows this resolution order: - 1. docs/supplementary/{stability}/{api_group}.md - 2. docs/supplementary/shared/{api_group}.md (fallback) - 3. Empty string if no files found - - Args: - api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists - - Returns: - The supplemental content as markdown string, or empty string if not found - """ - if not api_group: - return "" - - base_path = Path(__file__).parent.parent.parent / "supplementary" - - # Try stability-specific content first if stability filter is set - if self.options.stability_filter: - stability_path = base_path / self.options.stability_filter / f"{api_group}.md" - if stability_path.exists(): - try: - return stability_path.read_text(encoding="utf-8") - except Exception as e: - print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}") - - # Fall back to shared content - shared_path = base_path / "shared" / f"{api_group}.md" - if shared_path.exists(): - try: - return shared_path.read_text(encoding="utf-8") - except Exception as e: - print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}") - - # No supplemental content found - return "" - - def _build_operation(self, op: EndpointOperation) -> Operation: - if op.defining_class.__name__ in [ - "SyntheticDataGeneration", - "PostTraining", - ]: - op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)" - print(op.defining_class.__name__) - - # TODO (xiyan): temporary fix for datasetio inner impl + datasets api - # if op.defining_class.__name__ in ["DatasetIO"]: - # op.defining_class.__name__ = "Datasets" - - doc_string = parse_type(op.func_ref) - doc_params = dict( - (param.name, param.description) for param in doc_string.params.values() - ) - - # parameters passed in URL component path - path_parameters = [ - Parameter( - name=param_name, - in_=ParameterLocation.Path, - description=doc_params.get(param_name), - required=True, - schema=self.schema_builder.classdef_to_ref(param_type), - ) - for param_name, param_type in op.path_params - ] - - # parameters passed in URL component query string - query_parameters = [] - for param_name, param_type in op.query_params: - if is_type_optional(param_type): - inner_type: type = unwrap_optional_type(param_type) - required = False - else: - inner_type = param_type - required = True - - query_parameter = Parameter( - name=param_name, - in_=ParameterLocation.Query, - description=doc_params.get(param_name), - required=required, - schema=self.schema_builder.classdef_to_ref(inner_type), - ) - query_parameters.append(query_parameter) - - # parameters passed anywhere - parameters = path_parameters + query_parameters - - # Build extra body parameters documentation - extra_body_parameters = [] - for param_name, param_type, description in op.extra_body_params: - if is_type_optional(param_type): - inner_type: type = unwrap_optional_type(param_type) - required = False - else: - inner_type = param_type - required = True - - # Use description from ExtraBodyField if available, otherwise from docstring - param_description = description or doc_params.get(param_name) - - extra_body_param = ExtraBodyParameter( - name=param_name, - schema=self.schema_builder.classdef_to_ref(inner_type), - description=param_description, - required=required, - ) - extra_body_parameters.append(extra_body_param) - - webmethod = getattr(op.func_ref, "__webmethod__", None) - raw_bytes_request_body = False - if webmethod: - raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False) - - # data passed in request body as raw bytes cannot have request parameters - if raw_bytes_request_body and op.request_params: - raise ValueError( - "Cannot have both raw bytes request body and request parameters" - ) - - # data passed in request body as raw bytes - if raw_bytes_request_body: - requestBody = RequestBody( - content={ - "application/octet-stream": { - "schema": { - "type": "string", - "format": "binary", - } - } - }, - required=True, - ) - # data passed in request body as multipart/form-data - elif op.multipart_params: - builder = ContentBuilder(self.schema_builder) - - # Create schema properties for multipart form fields - properties = {} - required_fields = [] - - for name, param_type in op.multipart_params: - if get_origin(param_type) is Annotated: - base_type = get_args(param_type)[0] - else: - base_type = param_type - - # Check if the type is optional - is_optional = is_type_optional(base_type) - if is_optional: - base_type = unwrap_optional_type(base_type) - - if base_type is UploadFile: - # File upload - properties[name] = {"type": "string", "format": "binary"} - else: - # All other types - generate schema reference - # This includes enums, BaseModels, and simple types - properties[name] = self.schema_builder.classdef_to_ref(base_type) - - if not is_optional: - required_fields.append(name) - - multipart_schema = { - "type": "object", - "properties": properties, - "required": required_fields, - } - - requestBody = RequestBody( - content={"multipart/form-data": {"schema": multipart_schema}}, - required=True, - ) - # data passed in payload as JSON and mapped to request parameters - elif op.request_params: - builder = ContentBuilder(self.schema_builder) - first = next(iter(op.request_params)) - request_name, request_type = first - - # Special case: if there's a single parameter with Body(embed=False) that's a BaseModel, - # unwrap it to show the flat structure in the OpenAPI spec - # Example: openai_chat_completion() - if (len(op.request_params) == 1 and is_unwrapped_body_param(request_type)): - pass - else: - op_name = "".join(word.capitalize() for word in op.name.split("_")) - request_name = f"{op_name}Request" - fields = [ - ( - name, - type_, - ) - for name, type_ in op.request_params - ] - request_type = make_dataclass( - request_name, - fields, - namespace={ - "__doc__": create_docstring_for_request( - request_name, fields, doc_params - ) - }, - ) - - requestBody = RequestBody( - content={ - "application/json": builder.build_media_type( - request_type, op.request_examples - ) - }, - description=doc_params.get(request_name), - required=True, - ) - else: - requestBody = None - - # success response types - if doc_string.returns is None and is_type_union(op.response_type): - # split union of return types into a list of response types - success_type_docstring: Dict[type, Docstring] = { - typing.cast(type, item): parse_type(item) - for item in unwrap_union_types(op.response_type) - } - success_type_descriptions = { - item: doc_string.short_description - for item, doc_string in success_type_docstring.items() - } - else: - # use return type as a single response type - success_type_descriptions = { - op.response_type: ( - doc_string.returns.description if doc_string.returns else "OK" - ) - } - - response_examples = op.response_examples or [] - success_examples = [ - example - for example in response_examples - if not isinstance(example, Exception) - ] - - content_builder = ContentBuilder(self.schema_builder) - response_builder = ResponseBuilder(content_builder) - response_options = ResponseOptions( - success_type_descriptions, - success_examples if self.options.use_examples else None, - self.options.success_responses, - "200", - ) - responses = response_builder.build_response(response_options) - - # failure response types - if doc_string.raises: - exception_types: Dict[type, str] = { - item.raise_type: item.description for item in doc_string.raises.values() - } - exception_examples = [ - example - for example in response_examples - if isinstance(example, Exception) - ] - - if self.options.error_wrapper: - schema_transformer = schema_error_wrapper - sample_transformer = sample_error_wrapper - else: - schema_transformer = None - sample_transformer = None - - content_builder = ContentBuilder( - self.schema_builder, - schema_transformer=schema_transformer, - sample_transformer=sample_transformer, - ) - response_builder = ResponseBuilder(content_builder) - response_options = ResponseOptions( - exception_types, - exception_examples if self.options.use_examples else None, - self.options.error_responses, - "500", - ) - responses.update(response_builder.build_response(response_options)) - - assert len(responses.keys()) > 0, f"No responses found for {op.name}" - - # Add standard error response references - if self.options.include_standard_error_responses: - if "400" not in responses: - responses["400"] = ResponseRef("BadRequest400") - if "429" not in responses: - responses["429"] = ResponseRef("TooManyRequests429") - if "500" not in responses: - responses["500"] = ResponseRef("InternalServerError500") - if "default" not in responses: - responses["default"] = ResponseRef("DefaultError") - - if op.event_type is not None: - builder = ContentBuilder(self.schema_builder) - callbacks = { - f"{op.func_name}_callback": { - "{$request.query.callback}": PathItem( - post=Operation( - requestBody=RequestBody( - content=builder.build_content(op.event_type) - ), - responses={"200": Response(description="OK")}, - ) - ) - } - } - - else: - callbacks = None - - # Build base description from docstring - base_description = "\n".join( - filter(None, [doc_string.short_description, doc_string.long_description]) - ) - - # Individual endpoints get clean descriptions only - description = base_description - - return Operation( - tags=[ - getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__) - ], - summary=doc_string.short_description, - description=description, - parameters=parameters, - requestBody=requestBody, - responses=responses, - callbacks=callbacks, - deprecated=getattr(op.webmethod, "deprecated", False) - or "DEPRECATED" in op.func_name, - security=[] if op.public else None, - extraBodyParameters=extra_body_parameters if extra_body_parameters else None, - ) - - def _get_api_stability_priority(self, api_level: str) -> int: - """ - Return sorting priority for API stability levels. - Lower numbers = higher priority (appear first) - - :param api_level: The API level (e.g., "v1", "v1beta", "v1alpha") - :return: Priority number for sorting - """ - stability_order = { - "v1": 0, # Stable - highest priority - "v1beta": 1, # Beta - medium priority - "v1alpha": 2, # Alpha - lowest priority - } - return stability_order.get(api_level, 999) # Unknown levels go last - - def generate(self) -> Document: - paths: Dict[str, PathItem] = {} - endpoint_classes: Set[type] = set() - - # Collect all operations and filter by stability if specified - operations = list( - get_endpoint_operations( - self.endpoint, use_examples=self.options.use_examples - ) - ) - - # Filter operations by stability level if requested - if self.options.stability_filter: - filtered_operations = [] - for op in operations: - deprecated = ( - getattr(op.webmethod, "deprecated", False) - or "DEPRECATED" in op.func_name - ) - stability_level = op.webmethod.level - - if self.options.stability_filter == "stable": - # Include v1 non-deprecated endpoints - if stability_level == "v1" and not deprecated: - filtered_operations.append(op) - elif self.options.stability_filter == "experimental": - # Include v1alpha and v1beta endpoints (deprecated or not) - if stability_level in ["v1alpha", "v1beta"]: - filtered_operations.append(op) - elif self.options.stability_filter == "deprecated": - # Include only deprecated endpoints - if deprecated: - filtered_operations.append(op) - elif self.options.stability_filter == "stainless": - # Include both stable (v1 non-deprecated) and experimental (v1alpha, v1beta) endpoints - if (stability_level == "v1" and not deprecated) or stability_level in ["v1alpha", "v1beta"]: - filtered_operations.append(op) - - operations = filtered_operations - print( - f"Filtered to {len(operations)} operations for stability level: {self.options.stability_filter}" - ) - - # Sort operations by multiple criteria for consistent ordering: - # 1. Stability level with deprecation handling (global priority): - # - Active stable (v1) comes first - # - Beta (v1beta) comes next - # - Alpha (v1alpha) comes next - # - Deprecated stable (v1 deprecated) comes last - # 2. Route path (group related endpoints within same stability level) - # 3. HTTP method (GET, POST, PUT, DELETE, PATCH) - # 4. Operation name (alphabetical) - def sort_key(op): - http_method_order = { - HTTPMethod.GET: 0, - HTTPMethod.POST: 1, - HTTPMethod.PUT: 2, - HTTPMethod.DELETE: 3, - HTTPMethod.PATCH: 4, - } - - # Enhanced stability priority for migration pattern support - deprecated = getattr(op.webmethod, "deprecated", False) - stability_priority = self._get_api_stability_priority(op.webmethod.level) - - # Deprecated versions should appear after everything else - # This ensures deprecated stable endpoints come last globally - if deprecated: - stability_priority += 10 # Push deprecated endpoints to the end - - return ( - stability_priority, # Global stability handling comes first - op.get_route( - op.webmethod - ), # Group by route path within stability level - http_method_order.get(op.http_method, 999), - op.func_name, - ) - - operations.sort(key=sort_key) - - # Debug output for migration pattern tracking - migration_routes = {} - for op in operations: - route_key = (op.get_route(op.webmethod), op.http_method) - if route_key not in migration_routes: - migration_routes[route_key] = [] - migration_routes[route_key].append( - (op.webmethod.level, getattr(op.webmethod, "deprecated", False)) - ) - - for route_key, versions in migration_routes.items(): - if len(versions) > 1: - print(f"Migration pattern detected for {route_key[1]} {route_key[0]}:") - for level, deprecated in versions: - status = "DEPRECATED" if deprecated else "ACTIVE" - print(f" - {level} ({status})") - - for op in operations: - endpoint_classes.add(op.defining_class) - - operation = self._build_operation(op) - - if op.http_method is HTTPMethod.GET: - pathItem = PathItem(get=operation) - elif op.http_method is HTTPMethod.PUT: - pathItem = PathItem(put=operation) - elif op.http_method is HTTPMethod.POST: - pathItem = PathItem(post=operation) - elif op.http_method is HTTPMethod.DELETE: - pathItem = PathItem(delete=operation) - elif op.http_method is HTTPMethod.PATCH: - pathItem = PathItem(patch=operation) - else: - raise NotImplementedError(f"unknown HTTP method: {op.http_method}") - - route = op.get_route(op.webmethod) - route = route.replace(":path", "") - print(f"route: {route}") - if route in paths: - paths[route].update(pathItem) - else: - paths[route] = pathItem - - operation_tags: List[Tag] = [] - for cls in endpoint_classes: - doc_string = parse_type(cls) - if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__: - continue - - # Add supplemental content to tag pages - api_group = f"{cls.__name__.lower()}-api" - supplemental_content = self._load_supplemental_content(api_group) - - tag_description = doc_string.long_description or "" - if supplemental_content: - if tag_description: - tag_description = f"{tag_description}\n\n{supplemental_content}" - else: - tag_description = supplemental_content - - operation_tags.append( - Tag( - name=cls.__name__, - description=tag_description, - displayName=doc_string.short_description, - ) - ) - - # types that are emitted by events - event_tags: List[Tag] = [] - events = get_endpoint_events(self.endpoint) - for ref, event_type in events.items(): - event_schema = self.schema_builder.classdef_to_named_schema(ref, event_type) - event_tags.append(self._build_type_tag(ref, event_schema)) - - # types that are explicitly declared - extra_tag_groups: Dict[str, List[Tag]] = {} - if self.options.extra_types is not None: - if isinstance(self.options.extra_types, list): - extra_tag_groups = self._build_extra_tag_groups( - {"AdditionalTypes": self.options.extra_types} - ) - elif isinstance(self.options.extra_types, dict): - extra_tag_groups = self._build_extra_tag_groups( - self.options.extra_types - ) - else: - raise TypeError( - f"type mismatch for collection of extra types: {type(self.options.extra_types)}" - ) - - # list all operations and types - tags: List[Tag] = [] - tags.extend(operation_tags) - tags.extend(event_tags) - for extra_tag_group in extra_tag_groups.values(): - tags.extend(extra_tag_group) - - tags = sorted(tags, key=lambda t: t.name) - - tag_groups = [] - if operation_tags: - tag_groups.append( - TagGroup( - name=self.options.map("Operations"), - tags=sorted(tag.name for tag in operation_tags), - ) - ) - if event_tags: - tag_groups.append( - TagGroup( - name=self.options.map("Events"), - tags=sorted(tag.name for tag in event_tags), - ) - ) - for caption, extra_tag_group in extra_tag_groups.items(): - tag_groups.append( - TagGroup( - name=caption, - tags=sorted(tag.name for tag in extra_tag_group), - ) - ) - - if self.options.default_security_scheme: - securitySchemes = {"Default": self.options.default_security_scheme} - else: - securitySchemes = None - - return Document( - openapi=".".join(str(item) for item in self.options.version), - info=self.options.info, - jsonSchemaDialect=( - "https://json-schema.org/draft/2020-12/schema" - if self.options.version >= (3, 1, 0) - else None - ), - servers=[self.options.server], - paths=paths, - components=Components( - schemas=self.schema_builder.schemas, - responses=self.responses, - securitySchemes=securitySchemes, - ), - security=[{"Default": []}], - tags=tags, - tagGroups=tag_groups, - ) diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py deleted file mode 100644 index 2970d7e53..000000000 --- a/docs/openapi_generator/pyopenapi/operations.py +++ /dev/null @@ -1,463 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import collections.abc -import enum -import inspect -import typing -from dataclasses import dataclass -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union - -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA - -from termcolor import colored - -from llama_stack.strong_typing.inspection import get_signature - -from typing import get_origin, get_args - -from fastapi import UploadFile -from fastapi.params import File, Form -from typing import Annotated - -from llama_stack.schema_utils import ExtraBodyField - - -def split_prefix( - s: str, sep: str, prefix: Union[str, Iterable[str]] -) -> Tuple[Optional[str], str]: - """ - Recognizes a prefix at the beginning of a string. - - :param s: The string to check. - :param sep: A separator between (one of) the prefix(es) and the rest of the string. - :param prefix: A string or a set of strings to identify as a prefix. - :return: A tuple of the recognized prefix (if any) and the rest of the string excluding the separator (or the entire string). - """ - - if isinstance(prefix, str): - if s.startswith(prefix + sep): - return prefix, s[len(prefix) + len(sep) :] - else: - return None, s - - for p in prefix: - if s.startswith(p + sep): - return p, s[len(p) + len(sep) :] - - return None, s - - -def _get_annotation_type(annotation: Union[type, str], callable: Callable) -> type: - "Maps a stringized reference to a type, as if using `from __future__ import annotations`." - - if isinstance(annotation, str): - return eval(annotation, callable.__globals__) - else: - return annotation - - -class HTTPMethod(enum.Enum): - "HTTP method used to invoke an endpoint operation." - - GET = "GET" - POST = "POST" - PUT = "PUT" - DELETE = "DELETE" - PATCH = "PATCH" - - -OperationParameter = Tuple[str, type] - - -class ValidationError(TypeError): - pass - - -@dataclass -class EndpointOperation: - """ - Type information and metadata associated with an endpoint operation. - - "param defining_class: The most specific class that defines the endpoint operation. - :param name: The short name of the endpoint operation. - :param func_name: The name of the function to invoke when the operation is triggered. - :param func_ref: The callable to invoke when the operation is triggered. - :param route: A custom route string assigned to the operation. - :param path_params: Parameters of the operation signature that are passed in the path component of the URL string. - :param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs. - :param request_params: The parameter that corresponds to the data transmitted in the request body. - :param multipart_params: Parameters that indicate multipart/form-data request body. - :param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK. - :param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress. - :param response_type: The Python type of the data that is transmitted in the response body. - :param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT. - :param public: True if the operation can be invoked without prior authentication. - :param request_examples: Sample requests that the operation might take. - :param response_examples: Sample responses that the operation might produce. - """ - - defining_class: type - name: str - func_name: str - func_ref: Callable[..., Any] - route: Optional[str] - path_params: List[OperationParameter] - query_params: List[OperationParameter] - request_params: Optional[OperationParameter] - multipart_params: List[OperationParameter] - extra_body_params: List[tuple[str, type, str | None]] - event_type: Optional[type] - response_type: type - http_method: HTTPMethod - public: bool - request_examples: Optional[List[Any]] = None - response_examples: Optional[List[Any]] = None - - def get_route(self, webmethod) -> str: - api_level = webmethod.level - - if self.route is not None: - return "/".join(["", api_level, self.route.lstrip("/")]) - - route_parts = ["", api_level, self.name] - for param_name, _ in self.path_params: - route_parts.append("{" + param_name + "}") - return "/".join(route_parts) - - -class _FormatParameterExtractor: - "A visitor to exract parameters in a format string." - - keys: List[str] - - def __init__(self) -> None: - self.keys = [] - - def __getitem__(self, key: str) -> None: - self.keys.append(key) - return None - - -def _get_route_parameters(route: str) -> List[str]: - extractor = _FormatParameterExtractor() - # Replace all occurrences of ":path" with empty string - route = route.replace(":path", "") - route.format_map(extractor) - return extractor.keys - - -def _get_endpoint_functions( - endpoint: type, prefixes: List[str] -) -> Iterator[Tuple[str, str, str, Callable]]: - if not inspect.isclass(endpoint): - raise ValueError(f"object is not a class type: {endpoint}") - - functions = inspect.getmembers(endpoint, inspect.isfunction) - for func_name, func_ref in functions: - webmethods = [] - - # Check for multiple webmethods (stacked decorators) - if hasattr(func_ref, "__webmethods__"): - webmethods = func_ref.__webmethods__ - - if not webmethods: - continue - - for webmethod in webmethods: - print(f"Processing {colored(func_name, 'white')}...") - operation_name = func_name - - if webmethod.method == "GET": - prefix = "get" - elif webmethod.method == "DELETE": - prefix = "delete" - elif webmethod.method == "POST": - prefix = "post" - elif operation_name.startswith("get_") or operation_name.endswith("/get"): - prefix = "get" - elif ( - operation_name.startswith("delete_") - or operation_name.startswith("remove_") - or operation_name.endswith("/delete") - or operation_name.endswith("/remove") - ): - prefix = "delete" - else: - # by default everything else is a POST - prefix = "post" - - yield prefix, operation_name, func_name, func_ref - - -def _get_defining_class(member_fn: str, derived_cls: type) -> type: - "Find the class in which a member function is first defined in a class inheritance hierarchy." - - # This import must be dynamic here - from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime - - # iterate in reverse member resolution order to find most specific class first - for cls in reversed(inspect.getmro(derived_cls)): - for name, _ in inspect.getmembers(cls, inspect.isfunction): - if name == member_fn: - # HACK ALERT - if cls == RAGToolRuntime: - return ToolRuntime - return cls - - raise ValidationError( - f"cannot find defining class for {member_fn} in {derived_cls}" - ) - - -def get_endpoint_operations( - endpoint: type, use_examples: bool = True -) -> List[EndpointOperation]: - """ - Extracts a list of member functions in a class eligible for HTTP interface binding. - - These member functions are expected to have a signature like - ``` - async def get_object(self, uuid: str, version: int) -> Object: - ... - ``` - where the prefix `get_` translates to an HTTP GET, `object` corresponds to the name of the endpoint operation, - `uuid` and `version` are mapped to route path elements in "/object/{uuid}/{version}", and `Object` becomes - the response payload type, transmitted as an object serialized to JSON. - - If the member function has a composite class type in the argument list, it becomes the request payload type, - and the caller is expected to provide the data as serialized JSON in an HTTP POST request. - - :param endpoint: A class with member functions that can be mapped to an HTTP endpoint. - :param use_examples: Whether to return examples associated with member functions. - """ - - result = [] - - for prefix, operation_name, func_name, func_ref in _get_endpoint_functions( - endpoint, - [ - "create", - "delete", - "do", - "get", - "post", - "put", - "remove", - "set", - "update", - ], - ): - # Get all webmethods for this function - webmethods = getattr(func_ref, "__webmethods__", []) - - # Create one EndpointOperation for each webmethod - for webmethod in webmethods: - route = webmethod.route - route_params = _get_route_parameters(route) if route is not None else None - public = webmethod.public - request_examples = webmethod.request_examples - response_examples = webmethod.response_examples - - # inspect function signature for path and query parameters, and request/response payload type - signature = get_signature(func_ref) - - path_params = [] - query_params = [] - request_params = [] - multipart_params = [] - extra_body_params = [] - - for param_name, parameter in signature.parameters.items(): - param_type = _get_annotation_type(parameter.annotation, func_ref) - - # omit "self" for instance methods - if param_name == "self" and param_type is inspect.Parameter.empty: - continue - - # check if all parameters have explicit type - if parameter.annotation is inspect.Parameter.empty: - raise ValidationError( - f"parameter '{param_name}' in function '{func_name}' has no type annotation" - ) - - # Check if this is an extra_body parameter - is_extra_body, extra_body_desc = _is_extra_body_param(param_type) - if is_extra_body: - # Store in a separate list for documentation - extra_body_params.append((param_name, param_type, extra_body_desc)) - continue # Skip adding to request_params - - is_multipart = _is_multipart_param(param_type) - - if prefix in ["get", "delete"]: - if route_params is not None and param_name in route_params: - path_params.append((param_name, param_type)) - else: - query_params.append((param_name, param_type)) - else: - if route_params is not None and param_name in route_params: - path_params.append((param_name, param_type)) - elif is_multipart: - multipart_params.append((param_name, param_type)) - else: - request_params.append((param_name, param_type)) - - # check if function has explicit return type - if signature.return_annotation is inspect.Signature.empty: - raise ValidationError( - f"function '{func_name}' has no return type annotation" - ) - - return_type = _get_annotation_type(signature.return_annotation, func_ref) - - # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType] - # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request - if typing.get_origin(return_type) is collections.abc.Generator: - event_type, send_type, response_type = typing.get_args(return_type) - if send_type is not type(None): - raise ValidationError( - f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type" - ) - else: - event_type = None - - def process_type(t): - if typing.get_origin(t) is collections.abc.AsyncIterator: - # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List - # or the item type. I am choosing it to be the latter - args = typing.get_args(t) - return args[0] - elif typing.get_origin(t) is typing.Union: - types = [process_type(a) for a in typing.get_args(t)] - return typing._UnionGenericAlias(typing.Union, tuple(types)) - else: - return t - - response_type = process_type(return_type) - - if prefix in ["delete", "remove"]: - http_method = HTTPMethod.DELETE - elif prefix == "post": - http_method = HTTPMethod.POST - elif prefix == "get": - http_method = HTTPMethod.GET - elif prefix == "set": - http_method = HTTPMethod.PUT - elif prefix == "update": - http_method = HTTPMethod.PATCH - else: - raise ValidationError(f"unknown prefix {prefix}") - - # Create an EndpointOperation for this specific webmethod - operation = EndpointOperation( - defining_class=_get_defining_class(func_name, endpoint), - name=operation_name, - func_name=func_name, - func_ref=func_ref, - route=route, - path_params=path_params, - query_params=query_params, - request_params=request_params, - multipart_params=multipart_params, - extra_body_params=extra_body_params, - event_type=event_type, - response_type=response_type, - http_method=http_method, - public=public, - request_examples=request_examples if use_examples else None, - response_examples=response_examples if use_examples else None, - ) - - # Store the specific webmethod with this operation - operation.webmethod = webmethod - result.append(operation) - - if not result: - raise ValidationError(f"no eligible endpoint operations in type {endpoint}") - - return result - - -def get_endpoint_events(endpoint: type) -> Dict[str, type]: - results = {} - - for decl in typing.get_type_hints(endpoint).values(): - # check if signature is Callable[...] - origin = typing.get_origin(decl) - if origin is None or not issubclass(origin, Callable): # type: ignore - continue - - # check if signature is Callable[[...], Any] - args = typing.get_args(decl) - if len(args) != 2: - continue - params_type, return_type = args - if not isinstance(params_type, list): - continue - - # check if signature is Callable[[...], None] - if not issubclass(return_type, type(None)): - continue - - # check if signature is Callable[[EventType], None] - if len(params_type) != 1: - continue - - param_type = params_type[0] - results[param_type.__name__] = param_type - - return results - - -def _is_multipart_param(param_type: type) -> bool: - """ - Check if a parameter type indicates multipart form data. - - Returns True if the type is: - - UploadFile - - Annotated[UploadFile, File()] - - Annotated[str, Form()] - - Annotated[Any, File()] - - Annotated[Any, Form()] - """ - if param_type is UploadFile: - return True - - # Check for Annotated types - origin = get_origin(param_type) - if origin is None: - return False - - if origin is Annotated: - args = get_args(param_type) - if len(args) < 2: - return False - - # Check the annotations for File() or Form() - for annotation in args[1:]: - if isinstance(annotation, (File, Form)): - return True - return False - - -def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]: - """ - Check if parameter is marked as coming from extra_body. - - Returns: - (is_extra_body, description): Tuple of boolean and optional description - """ - origin = get_origin(param_type) - if origin is Annotated: - args = get_args(param_type) - for annotation in args[1:]: - if isinstance(annotation, ExtraBodyField): - return True, annotation.description - # Also check by type name for cases where import matters - if type(annotation).__name__ == 'ExtraBodyField': - return True, getattr(annotation, 'description', None) - return False, None diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py deleted file mode 100644 index 53855b5b6..000000000 --- a/docs/openapi_generator/pyopenapi/options.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import dataclasses -from dataclasses import dataclass -from http import HTTPStatus -from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Union - -from .specification import ( - Info, - SecurityScheme, - SecuritySchemeAPI, - SecuritySchemeHTTP, - SecuritySchemeOpenIDConnect, - Server, -) - -HTTPStatusCode = Union[HTTPStatus, int, str] - - -@dataclass -class Options: - """ - :param server: Base URL for the API endpoint. - :param info: Meta-information for the endpoint specification. - :param version: OpenAPI specification version as a tuple of major, minor, revision. - :param default_security_scheme: Security scheme to apply to endpoints, unless overridden on a per-endpoint basis. - :param extra_types: Extra types in addition to those found in operation signatures. Use a dictionary to group related types. - :param use_examples: Whether to emit examples for operations. - :param success_responses: Associates operation response types with HTTP status codes. - :param error_responses: Associates error response types with HTTP status codes. - :param error_wrapper: True if errors are encapsulated in an error object wrapper. - :param property_description_fun: Custom transformation function to apply to class property documentation strings. - :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types. - :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations. - """ - - server: Server - info: Info - version: Tuple[int, int, int] = (3, 1, 0) - default_security_scheme: Optional[SecurityScheme] = None - extra_types: Union[List[type], Dict[str, List[type]], None] = None - use_examples: bool = True - success_responses: Dict[type, HTTPStatusCode] = dataclasses.field( - default_factory=dict - ) - error_responses: Dict[type, HTTPStatusCode] = dataclasses.field( - default_factory=dict - ) - error_wrapper: bool = False - property_description_fun: Optional[Callable[[type, str, str], str]] = None - captions: Optional[Dict[str, str]] = None - include_standard_error_responses: bool = True - stability_filter: Optional[str] = None - - default_captions: ClassVar[Dict[str, str]] = { - "Operations": "Operations", - "Types": "Types", - "Events": "Events", - "AdditionalTypes": "Additional types", - } - - def map(self, id: str) -> str: - "Maps a language-neutral placeholder string to language-dependent text." - - if self.captions is not None: - caption = self.captions.get(id) - if caption is not None: - return caption - - caption = self.__class__.default_captions.get(id) - if caption is not None: - return caption - - raise KeyError(f"no caption found for ID: {id}") diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py deleted file mode 100644 index 90bf54316..000000000 --- a/docs/openapi_generator/pyopenapi/specification.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import dataclasses -import enum -from dataclasses import dataclass -from typing import Any, ClassVar, Dict, List, Optional, Union - -from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType - -URL = str - - -@dataclass -class Ref: - ref_type: ClassVar[str] - id: str - - def to_json(self) -> StrictJsonType: - return {"$ref": f"#/components/{self.ref_type}/{self.id}"} - - -@dataclass -class SchemaRef(Ref): - ref_type: ClassVar[str] = "schemas" - - -SchemaOrRef = Union[Schema, SchemaRef] - - -@dataclass -class ResponseRef(Ref): - ref_type: ClassVar[str] = "responses" - - -@dataclass -class ParameterRef(Ref): - ref_type: ClassVar[str] = "parameters" - - -@dataclass -class ExampleRef(Ref): - ref_type: ClassVar[str] = "examples" - - -@dataclass -class Contact: - name: Optional[str] = None - url: Optional[URL] = None - email: Optional[str] = None - - -@dataclass -class License: - name: str - url: Optional[URL] = None - - -@dataclass -class Info: - title: str - version: str - description: Optional[str] = None - termsOfService: Optional[str] = None - contact: Optional[Contact] = None - license: Optional[License] = None - - -@dataclass -class MediaType: - schema: Optional[SchemaOrRef] = None - example: Optional[Any] = None - examples: Optional[Dict[str, Union["Example", ExampleRef]]] = None - - -@dataclass -class RequestBody: - content: Dict[str, MediaType | Dict[str, Any]] - description: Optional[str] = None - required: Optional[bool] = None - - -@dataclass -class Response: - description: str - content: Optional[Dict[str, MediaType]] = None - - -class ParameterLocation(enum.Enum): - Query = "query" - Header = "header" - Path = "path" - Cookie = "cookie" - - -@dataclass -class Parameter: - name: str - in_: ParameterLocation - description: Optional[str] = None - required: Optional[bool] = None - schema: Optional[SchemaOrRef] = None - example: Optional[Any] = None - - -@dataclass -class ExtraBodyParameter: - """Represents a parameter that arrives via extra_body in the request.""" - name: str - schema: SchemaOrRef - description: Optional[str] = None - required: Optional[bool] = None - - -@dataclass -class Operation: - responses: Dict[str, Union[Response, ResponseRef]] - tags: Optional[List[str]] = None - summary: Optional[str] = None - description: Optional[str] = None - operationId: Optional[str] = None - parameters: Optional[List[Parameter]] = None - requestBody: Optional[RequestBody] = None - callbacks: Optional[Dict[str, "Callback"]] = None - security: Optional[List["SecurityRequirement"]] = None - deprecated: Optional[bool] = None - extraBodyParameters: Optional[List[ExtraBodyParameter]] = None - - -@dataclass -class PathItem: - summary: Optional[str] = None - description: Optional[str] = None - get: Optional[Operation] = None - put: Optional[Operation] = None - post: Optional[Operation] = None - delete: Optional[Operation] = None - options: Optional[Operation] = None - head: Optional[Operation] = None - patch: Optional[Operation] = None - trace: Optional[Operation] = None - - def update(self, other: "PathItem") -> None: - "Merges another instance of this class into this object." - - for field in dataclasses.fields(self.__class__): - value = getattr(other, field.name) - if value is not None: - setattr(self, field.name, value) - - -# maps run-time expressions such as "$request.body#/url" to path items -Callback = Dict[str, PathItem] - - -@dataclass -class Example: - summary: Optional[str] = None - description: Optional[str] = None - value: Optional[Any] = None - externalValue: Optional[URL] = None - - -@dataclass -class Server: - url: URL - description: Optional[str] = None - - -class SecuritySchemeType(enum.Enum): - ApiKey = "apiKey" - HTTP = "http" - OAuth2 = "oauth2" - OpenIDConnect = "openIdConnect" - - -@dataclass -class SecurityScheme: - type: SecuritySchemeType - description: str - - -@dataclass(init=False) -class SecuritySchemeAPI(SecurityScheme): - name: str - in_: ParameterLocation - - def __init__(self, description: str, name: str, in_: ParameterLocation) -> None: - super().__init__(SecuritySchemeType.ApiKey, description) - self.name = name - self.in_ = in_ - - -@dataclass(init=False) -class SecuritySchemeHTTP(SecurityScheme): - scheme: str - bearerFormat: Optional[str] = None - - def __init__( - self, description: str, scheme: str, bearerFormat: Optional[str] = None - ) -> None: - super().__init__(SecuritySchemeType.HTTP, description) - self.scheme = scheme - self.bearerFormat = bearerFormat - - -@dataclass(init=False) -class SecuritySchemeOpenIDConnect(SecurityScheme): - openIdConnectUrl: str - - def __init__(self, description: str, openIdConnectUrl: str) -> None: - super().__init__(SecuritySchemeType.OpenIDConnect, description) - self.openIdConnectUrl = openIdConnectUrl - - -@dataclass -class Components: - schemas: Optional[Dict[str, Schema]] = None - responses: Optional[Dict[str, Response]] = None - parameters: Optional[Dict[str, Parameter]] = None - examples: Optional[Dict[str, Example]] = None - requestBodies: Optional[Dict[str, RequestBody]] = None - securitySchemes: Optional[Dict[str, SecurityScheme]] = None - callbacks: Optional[Dict[str, Callback]] = None - - -SecurityScope = str -SecurityRequirement = Dict[str, List[SecurityScope]] - - -@dataclass -class Tag: - name: str - description: Optional[str] = None - displayName: Optional[str] = None - - -@dataclass -class TagGroup: - """ - A ReDoc extension to provide information about groups of tags. - - Exposed via the vendor-specific property "x-tagGroups" of the top-level object. - """ - - name: str - tags: List[str] - - -@dataclass -class Document: - """ - This class is a Python dataclass adaptation of the OpenAPI Specification. - - For details, see - """ - - openapi: str - info: Info - servers: List[Server] - paths: Dict[str, PathItem] - jsonSchemaDialect: Optional[str] = None - components: Optional[Components] = None - security: Optional[List[SecurityRequirement]] = None - tags: Optional[List[Tag]] = None - tagGroups: Optional[List[TagGroup]] = None diff --git a/docs/openapi_generator/pyopenapi/template.html b/docs/openapi_generator/pyopenapi/template.html deleted file mode 100644 index 5848f364e..000000000 --- a/docs/openapi_generator/pyopenapi/template.html +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - - - - - - - diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py deleted file mode 100644 index c1425b250..000000000 --- a/docs/openapi_generator/pyopenapi/utility.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -import typing -import inspect -from pathlib import Path -from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args - -from pydantic import BaseModel -from llama_stack.strong_typing.schema import object_to_json, StrictJsonType -from llama_stack.strong_typing.inspection import is_unwrapped_body_param -from llama_stack.core.resolver import api_protocol_map - -from .generator import Generator -from .options import Options -from .specification import Document - -THIS_DIR = Path(__file__).parent - - -class Specification: - document: Document - - def __init__(self, endpoint: type, options: Options): - generator = Generator(endpoint, options) - self.document = generator.generate() - - def get_json(self) -> StrictJsonType: - """ - Returns the OpenAPI specification as a Python data type (e.g. `dict` for an object, `list` for an array). - - The result can be serialized to a JSON string with `json.dump` or `json.dumps`. - """ - - json_doc = typing.cast(StrictJsonType, object_to_json(self.document)) - - if isinstance(json_doc, dict): - # rename vendor-specific properties - tag_groups = json_doc.pop("tagGroups", None) - if tag_groups: - json_doc["x-tagGroups"] = tag_groups - tags = json_doc.get("tags") - if tags and isinstance(tags, list): - for tag in tags: - if not isinstance(tag, dict): - continue - - display_name = tag.pop("displayName", None) - if display_name: - tag["x-displayName"] = display_name - - # Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params - paths = json_doc.get("paths", {}) - for path_item in paths.values(): - if isinstance(path_item, dict): - for method in ["get", "post", "put", "delete", "patch"]: - operation = path_item.get(method) - if operation and isinstance(operation, dict): - extra_body_params = operation.pop("extraBodyParameters", None) - if extra_body_params: - operation["x-llama-stack-extra-body-params"] = extra_body_params - - return json_doc - - def get_json_string(self, pretty_print: bool = False) -> str: - """ - Returns the OpenAPI specification as a JSON string. - - :param pretty_print: Whether to use line indents to beautify the output. - """ - - json_doc = self.get_json() - if pretty_print: - return json.dumps( - json_doc, check_circular=False, ensure_ascii=False, indent=4 - ) - else: - return json.dumps( - json_doc, - check_circular=False, - ensure_ascii=False, - separators=(",", ":"), - ) - - def write_json(self, f: TextIO, pretty_print: bool = False) -> None: - """ - Writes the OpenAPI specification to a file as a JSON string. - - :param pretty_print: Whether to use line indents to beautify the output. - """ - - json_doc = self.get_json() - if pretty_print: - json.dump( - json_doc, - f, - check_circular=False, - ensure_ascii=False, - indent=4, - ) - else: - json.dump( - json_doc, - f, - check_circular=False, - ensure_ascii=False, - separators=(",", ":"), - ) - - def write_html(self, f: TextIO, pretty_print: bool = False) -> None: - """ - Creates a stand-alone HTML page for the OpenAPI specification with ReDoc. - - :param pretty_print: Whether to use line indents to beautify the JSON string in the HTML file. - """ - - path = THIS_DIR / "template.html" - with path.open(encoding="utf-8", errors="strict") as html_template_file: - html_template = html_template_file.read() - - html = html_template.replace( - "{ /* OPENAPI_SPECIFICATION */ }", - self.get_json_string(pretty_print=pretty_print), - ) - - f.write(html) - -def is_optional_type(type_: Any) -> bool: - """Check if a type is Optional.""" - origin = get_origin(type_) - args = get_args(type_) - return origin is Optional or (origin is Union and type(None) in args) - - -def _validate_api_method_return_type(method) -> str | None: - hints = get_type_hints(method) - - if 'return' not in hints: - return "has no return type annotation" - - return_type = hints['return'] - if is_optional_type(return_type): - return "returns Optional type where a return value is mandatory" - - -def _validate_api_method_doesnt_return_list(method) -> str | None: - hints = get_type_hints(method) - - if 'return' not in hints: - return "has no return type annotation" - - return_type = hints['return'] - if get_origin(return_type) is list: - return "returns a list where a PaginatedResponse or List*Response object is expected" - - -def _validate_api_delete_method_returns_none(method) -> str | None: - hints = get_type_hints(method) - - if 'return' not in hints: - return "has no return type annotation" - - return_type = hints['return'] - - # Allow OpenAI endpoints to return response objects since they follow OpenAI specification - method_name = getattr(method, '__name__', '') - if method_name.__contains__('openai_'): - return None - - if return_type is not None and return_type is not type(None): - return "does not return None where None is mandatory" - - -def _validate_list_parameters_contain_data(method) -> str | None: - hints = get_type_hints(method) - - if 'return' not in hints: - return "has no return type annotation" - - return_type = hints['return'] - if not inspect.isclass(return_type): - return - - if not return_type.__name__.startswith('List'): - return - - if 'data' not in return_type.model_fields: - return "does not have a mandatory data attribute containing the list of objects" - - -def _validate_has_ellipsis(method) -> str | None: - source = inspect.getsource(method) - if "..." not in source and not "NotImplementedError" in source: - return "does not contain ellipsis (...) in its implementation" - -def _validate_has_return_in_docstring(method) -> str | None: - source = inspect.getsource(method) - return_type = method.__annotations__.get('return') - if return_type is not None and return_type != type(None) and ":returns:" not in source: - return "does not have a ':returns:' in its docstring" - -def _validate_has_params_in_docstring(method) -> str | None: - source = inspect.getsource(method) - sig = inspect.signature(method) - - params_list = [p for p in sig.parameters.values() if p.name != "self"] - if len(params_list) == 1: - param = params_list[0] - param_type = param.annotation - if is_unwrapped_body_param(param_type): - return - - # Only check if the method has more than one parameter - if len(sig.parameters) > 1 and ":param" not in source: - return "does not have a ':param' in its docstring" - -def _validate_has_no_return_none_in_docstring(method) -> str | None: - source = inspect.getsource(method) - return_type = method.__annotations__.get('return') - if return_type is None and ":returns: None" in source: - return "has a ':returns: None' in its docstring which is redundant for None-returning functions" - -def _validate_docstring_lines_end_with_dot(method) -> str | None: - docstring = inspect.getdoc(method) - if docstring is None: - return None - - lines = docstring.split('\n') - for line in lines: - line = line.strip() - if line and not any(line.endswith(char) for char in '.:{}[]()",'): - return f"docstring line '{line}' does not end with a valid character: . : {{ }} [ ] ( ) , \"" - -_VALIDATORS = { - "GET": [ - _validate_api_method_return_type, - _validate_list_parameters_contain_data, - _validate_api_method_doesnt_return_list, - _validate_has_ellipsis, - _validate_has_return_in_docstring, - _validate_has_params_in_docstring, - _validate_docstring_lines_end_with_dot, - ], - "DELETE": [ - _validate_api_delete_method_returns_none, - _validate_has_ellipsis, - _validate_has_return_in_docstring, - _validate_has_params_in_docstring, - _validate_has_no_return_none_in_docstring - ], - "POST": [ - _validate_has_ellipsis, - _validate_has_return_in_docstring, - _validate_has_params_in_docstring, - _validate_has_no_return_none_in_docstring, - _validate_docstring_lines_end_with_dot, - ], -} - - -def _get_methods_by_type(protocol, method_type: str): - members = inspect.getmembers(protocol, predicate=inspect.isfunction) - return { - method_name: method - for method_name, method in members - if (webmethod := getattr(method, '__webmethod__', None)) - if webmethod and webmethod.method == method_type - } - - -def validate_api() -> List[str]: - """Validate the API protocols.""" - errors = [] - protocols = api_protocol_map() - - for target, validators in _VALIDATORS.items(): - for protocol_name, protocol in protocols.items(): - for validator in validators: - for method_name, method in _get_methods_by_type(protocol, target).items(): - err = validator(method) - if err: - errors.append(f"Method {protocol_name}.{method_name} {err}") - - return errors diff --git a/docs/openapi_generator/run_openapi_generator.sh b/docs/openapi_generator/run_openapi_generator.sh deleted file mode 100755 index 6cffd42b0..000000000 --- a/docs/openapi_generator/run_openapi_generator.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -PYTHONPATH=${PYTHONPATH:-} -THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" - -set -euo pipefail - -missing_packages=() - -check_package() { - if ! pip show "$1" &>/dev/null; then - missing_packages+=("$1") - fi -} - -if [ ${#missing_packages[@]} -ne 0 ]; then - echo "Error: The following package(s) are not installed:" - printf " - %s\n" "${missing_packages[@]}" - echo "Please install them using:" - echo "pip install ${missing_packages[*]}" - exit 1 -fi - -stack_dir=$(dirname $(dirname $THIS_DIR)) -PYTHONPATH=$PYTHONPATH:$stack_dir \ - python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/static - -cp $stack_dir/docs/static/stainless-llama-stack-spec.yaml $stack_dir/client-sdks/stainless/openapi.yml diff --git a/docs/package-lock.json b/docs/package-lock.json index aa133c935..2a548914c 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -17,7 +17,12 @@ "docusaurus-theme-openapi-docs": "4.3.7", "prism-react-renderer": "^2.3.0", "react": "^19.0.0", - "react-dom": "^19.0.0" + "react-dom": "^19.0.0", + "remark-code-import": "^1.2.0" + }, + "devDependencies": { + "raw-loader": "^4.0.2", + "react-markdown": "^10.1.0" } }, "node_modules/@algolia/abtesting": { @@ -8557,6 +8562,16 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/hast-util-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-2.0.1.tgz", + "integrity": "sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/docusaurus-theme-openapi-docs/node_modules/hastscript": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-7.2.0.tgz", @@ -8732,6 +8747,41 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/mdast-util-to-hast": { + "version": "12.3.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-12.3.0.tgz", + "integrity": "sha512-pits93r8PhnIoU4Vy9bjW39M2jJ6/tdHyja9rrot9uujkN7UTU9SDnE6WNJz/IGyQk3XHX6yNNtrBH6cQzm8Hw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-definitions": "^5.0.0", + "micromark-util-sanitize-uri": "^1.1.0", + "trim-lines": "^3.0.0", + "unist-util-generated": "^2.0.0", + "unist-util-position": "^4.0.0", + "unist-util-visit": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/mdast-util-to-hast/node_modules/unist-util-visit": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", + "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.1.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/docusaurus-theme-openapi-docs/node_modules/mdast-util-to-markdown": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-1.5.0.tgz", @@ -9309,6 +9359,58 @@ "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", "license": "MIT" }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/react-markdown": { + "version": "8.0.7", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-8.0.7.tgz", + "integrity": "sha512-bvWbzG4MtOU62XqBx3Xx+zB2raaFFsq4mYiAzfjXJMEz2sixgeAfraA3tvzULF02ZdOMUOKTBFFaZJDDrq+BJQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/prop-types": "^15.0.0", + "@types/unist": "^2.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-whitespace": "^2.0.0", + "prop-types": "^15.0.0", + "property-information": "^6.0.0", + "react-is": "^18.0.0", + "remark-parse": "^10.0.0", + "remark-rehype": "^10.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-object": "^0.4.0", + "unified": "^10.0.0", + "unist-util-visit": "^4.0.0", + "vfile": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=16", + "react": ">=16" + } + }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/react-markdown/node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "license": "MIT" + }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/react-markdown/node_modules/unist-util-visit": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", + "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.1.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/docusaurus-theme-openapi-docs/node_modules/react-redux": { "version": "7.2.9", "resolved": "https://registry.npmjs.org/react-redux/-/react-redux-7.2.9.tgz", @@ -9365,6 +9467,37 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/remark-parse": { + "version": "10.0.2", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-10.0.2.tgz", + "integrity": "sha512-3ydxgHa/ZQzG8LvC7jTXccARYDcRld3VfcgIIFs7bI6vbRSxJJmzgLEIIoYKyrfhaY+ujuWaf/PJiMZXoiCXgw==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^3.0.0", + "mdast-util-from-markdown": "^1.0.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/docusaurus-theme-openapi-docs/node_modules/remark-rehype": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-10.1.0.tgz", + "integrity": "sha512-EFmR5zppdBp0WQeDVZ/b66CWJipB2q2VLNFMabzDSGR66Z2fQii83G5gTBbgGEnEEA0QRussvrFHxk1HWGJskw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/mdast": "^3.0.0", + "mdast-util-to-hast": "^12.1.0", + "unified": "^10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/docusaurus-theme-openapi-docs/node_modules/unified": { "version": "10.1.2", "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", @@ -10579,12 +10712,6 @@ "integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==", "license": "Unlicense" }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", - "license": "ISC" - }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -10688,21 +10815,20 @@ "license": "ISC" }, "node_modules/glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "deprecated": "Glob versions prior to v9 are no longer supported", + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", "license": "ISC", "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" }, - "engines": { - "node": "*" + "bin": { + "glob": "dist/esm/bin.mjs" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -10726,26 +10852,19 @@ "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", "license": "BSD-2-Clause" }, - "node_modules/glob/node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/glob/node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", "license": "ISC", "dependencies": { - "brace-expansion": "^1.1.7" + "brace-expansion": "^2.0.1" }, "engines": { - "node": "*" + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/global-dirs": { @@ -11287,6 +11406,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/html-url-attributes": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", + "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==", + "dev": true, + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/html-void-elements": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", @@ -11648,17 +11778,6 @@ "node": ">=12" } }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", - "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", - "license": "ISC", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", @@ -15426,15 +15545,6 @@ "node": ">= 0.8" } }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "license": "ISC", - "dependencies": { - "wrappy": "1" - } - }, "node_modules/onetime": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", @@ -15811,15 +15921,6 @@ "node": "^12.20.0 || ^14.13.1 || >=16.0.0" } }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/path-is-inside": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz", @@ -17704,6 +17805,80 @@ "node": ">=0.10.0" } }, + "node_modules/raw-loader": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/raw-loader/-/raw-loader-4.0.2.tgz", + "integrity": "sha512-ZnScIV3ag9A4wPX/ZayxL/jZH+euYb6FcUinPcgiQW0+UBtEv0O6Q3lGd3cqJ+GHH+rksEv3Pj99oxJ3u3VIKA==", + "dev": true, + "license": "MIT", + "dependencies": { + "loader-utils": "^2.0.0", + "schema-utils": "^3.0.0" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^4.0.0 || ^5.0.0" + } + }, + "node_modules/raw-loader/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/raw-loader/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, + "node_modules/raw-loader/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true, + "license": "MIT" + }, + "node_modules/raw-loader/node_modules/schema-utils": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", + "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/json-schema": "^7.0.8", + "ajv": "^6.12.5", + "ajv-keywords": "^3.5.2" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -17868,675 +18043,31 @@ "license": "MIT" }, "node_modules/react-markdown": { - "version": "8.0.7", - "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-8.0.7.tgz", - "integrity": "sha512-bvWbzG4MtOU62XqBx3Xx+zB2raaFFsq4mYiAzfjXJMEz2sixgeAfraA3tvzULF02ZdOMUOKTBFFaZJDDrq+BJQ==", + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-10.1.0.tgz", + "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==", + "dev": true, "license": "MIT", "dependencies": { - "@types/hast": "^2.0.0", - "@types/prop-types": "^15.0.0", - "@types/unist": "^2.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-whitespace": "^2.0.0", - "prop-types": "^15.0.0", - "property-information": "^6.0.0", - "react-is": "^18.0.0", - "remark-parse": "^10.0.0", - "remark-rehype": "^10.0.0", - "space-separated-tokens": "^2.0.0", - "style-to-object": "^0.4.0", - "unified": "^10.0.0", - "unist-util-visit": "^4.0.0", - "vfile": "^5.0.0" + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "hast-util-to-jsx-runtime": "^2.0.0", + "html-url-attributes": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "remark-parse": "^11.0.0", + "remark-rehype": "^11.0.0", + "unified": "^11.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" }, "funding": { "type": "opencollective", "url": "https://opencollective.com/unified" }, "peerDependencies": { - "@types/react": ">=16", - "react": ">=16" - } - }, - "node_modules/react-markdown/node_modules/@types/hast": { - "version": "2.3.10", - "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", - "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2" - } - }, - "node_modules/react-markdown/node_modules/@types/mdast": { - "version": "3.0.15", - "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.15.tgz", - "integrity": "sha512-LnwD+mUEfxWMa1QpDraczIn6k0Ee3SMicuYSSzS6ZYl2gKS09EClnJYGd8Du6rfc5r/GZEk5o1mRb8TaTj03sQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2" - } - }, - "node_modules/react-markdown/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/react-markdown/node_modules/hast-util-whitespace": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-2.0.1.tgz", - "integrity": "sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/mdast-util-from-markdown": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-1.3.1.tgz", - "integrity": "sha512-4xTO/M8c82qBcnQc1tgpNtubGUW/Y1tBQ1B0i5CtSoelOLKFYlElIr3bvgREYYO5iRqbMY1YuqZng0GVOI8Qww==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^3.0.0", - "@types/unist": "^2.0.0", - "decode-named-character-reference": "^1.0.0", - "mdast-util-to-string": "^3.1.0", - "micromark": "^3.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-decode-string": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "unist-util-stringify-position": "^3.0.0", - "uvu": "^0.5.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/mdast-util-to-hast": { - "version": "12.3.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-12.3.0.tgz", - "integrity": "sha512-pits93r8PhnIoU4Vy9bjW39M2jJ6/tdHyja9rrot9uujkN7UTU9SDnE6WNJz/IGyQk3XHX6yNNtrBH6cQzm8Hw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^2.0.0", - "@types/mdast": "^3.0.0", - "mdast-util-definitions": "^5.0.0", - "micromark-util-sanitize-uri": "^1.1.0", - "trim-lines": "^3.0.0", - "unist-util-generated": "^2.0.0", - "unist-util-position": "^4.0.0", - "unist-util-visit": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/mdast-util-to-string": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-3.2.0.tgz", - "integrity": "sha512-V4Zn/ncyN1QNSqSBxTrMOLpjr+IKdHl2v3KVLoWmDPscP4r9GcCi71gjgvUV1SFSKh92AjAG4peFuBl2/YgCJg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/micromark": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/micromark/-/micromark-3.2.0.tgz", - "integrity": "sha512-uD66tJj54JLYq0De10AhWycZWGQNUvDI55xPgk2sQM5kn1JYlhbCMTtEeT27+vAhW2FBQxLlOmS3pmA7/2z4aA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/debug": "^4.0.0", - "debug": "^4.0.0", - "decode-named-character-reference": "^1.0.0", - "micromark-core-commonmark": "^1.0.1", - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-chunked": "^1.0.0", - "micromark-util-combine-extensions": "^1.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-encode": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-resolve-all": "^1.0.0", - "micromark-util-sanitize-uri": "^1.0.0", - "micromark-util-subtokenize": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.1", - "uvu": "^0.5.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-core-commonmark": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-1.1.0.tgz", - "integrity": "sha512-BgHO1aRbolh2hcrzL2d1La37V0Aoz73ymF8rAcKnohLy93titmv62E0gP8Hrx9PKcKrqCZ1BbLGbP3bEhoXYlw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "micromark-factory-destination": "^1.0.0", - "micromark-factory-label": "^1.0.0", - "micromark-factory-space": "^1.0.0", - "micromark-factory-title": "^1.0.0", - "micromark-factory-whitespace": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-chunked": "^1.0.0", - "micromark-util-classify-character": "^1.0.0", - "micromark-util-html-tag-name": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-resolve-all": "^1.0.0", - "micromark-util-subtokenize": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.1", - "uvu": "^0.5.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-factory-destination": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-1.1.0.tgz", - "integrity": "sha512-XaNDROBgx9SgSChd69pjiGKbV+nfHGDPVYFs5dOoDd7ZnMAE+Cuu91BCpsY8RT2NP9vo/B8pds2VQNCLiu0zhg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-factory-label": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-1.1.0.tgz", - "integrity": "sha512-OLtyez4vZo/1NjxGhcpDSbHQ+m0IIGnT8BoPamh+7jVlzLJBH98zzuCoUeMxvM6WsNeh8wx8cKvqLiPHEACn0w==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "uvu": "^0.5.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-factory-title": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-1.1.0.tgz", - "integrity": "sha512-J7n9R3vMmgjDOCY8NPw55jiyaQnH5kBdV2/UXCtZIpnHH3P6nHUKaH7XXEYuWwx/xUJcawa8plLBEjMPU24HzQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-factory-whitespace": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-1.1.0.tgz", - "integrity": "sha512-v2WlmiymVSp5oMg+1Q0N1Lxmt6pMhIHD457whWM7/GUlEks1hI9xj5w3zbc4uuMKXGisksZk8DzP2UyGbGqNsQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-chunked": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-1.1.0.tgz", - "integrity": "sha512-Ye01HXpkZPNcV6FiyoW2fGZDUw4Yc7vT0E9Sad83+bEDiCJ1uXu0S3mr8WLpsz3HaG3x2q0HM6CTuPdcZcluFQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-classify-character": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-1.1.0.tgz", - "integrity": "sha512-SL0wLxtKSnklKSUplok1WQFoGhUdWYKggKUiqhX+Swala+BtptGCu5iPRc+xvzJ4PXE/hwM3FNXsfEVgoZsWbw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-combine-extensions": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-1.1.0.tgz", - "integrity": "sha512-Q20sp4mfNf9yEqDL50WwuWZHUrCO4fEyeDCnMGmG5Pr0Cz15Uo7KBs6jq+dq0EgX4DPwwrh9m0X+zPV1ypFvUA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-chunked": "^1.0.0", - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-decode-numeric-character-reference": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-1.1.0.tgz", - "integrity": "sha512-m9V0ExGv0jB1OT21mrWcuf4QhP46pH1KkfWy9ZEezqHKAxkj4mPCy3nIH1rkbdMlChLHX531eOrymlwyZIf2iw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-decode-string": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-1.1.0.tgz", - "integrity": "sha512-YphLGCK8gM1tG1bd54azwyrQRjCFcmgj2S2GoJDNnh4vYtnL38JS8M4gpxzOPNyHdNEpheyWXCTnnTDY3N+NVQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-symbol": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-encode": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-1.1.0.tgz", - "integrity": "sha512-EuEzTWSTAj9PA5GOAs992GzNh2dGQO52UvAbtSOMvXTxv3Criqb6IOzJUBCmEqrrXSblJIJBbFFv6zPxpreiJw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/react-markdown/node_modules/micromark-util-html-tag-name": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-1.2.0.tgz", - "integrity": "sha512-VTQzcuQgFUD7yYztuQFKXT49KghjtETQ+Wv/zUjGSGBioZnkA4P1XXZPT1FHeJA6RwRXSF47yvJ1tsJdoxwO+Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/react-markdown/node_modules/micromark-util-normalize-identifier": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-1.1.0.tgz", - "integrity": "sha512-N+w5vhqrBihhjdpM8+5Xsxy71QWqGn7HYNUvch71iV2PM7+E3uWGox1Qp90loa1ephtCxG2ftRV/Conitc6P2Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-resolve-all": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-1.1.0.tgz", - "integrity": "sha512-b/G6BTMSg+bX+xVCshPTPyAu2tmA0E4X98NSR7eIbeC6ycCqCeE7wjfDIgzEbkzdEVJXRtOG4FbEm/uGbCRouA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-types": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-sanitize-uri": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-1.2.0.tgz", - "integrity": "sha512-QO4GXv0XZfWey4pYFndLUKEAktKkG5kZTdUNaTAkzbuJxn2tNBOr+QtxR2XpWaMhbImT2dPzyLrPXLlPhph34A==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-encode": "^1.0.0", - "micromark-util-symbol": "^1.0.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-subtokenize": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-1.1.0.tgz", - "integrity": "sha512-kUQHyzRoxvZO2PuLzMt2P/dwVsTiivCK8icYTeR+3WgbuPqfHgPPy7nFKbeqRivBvn/3N3GBiNC+JRTMSxEC7A==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-chunked": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "uvu": "^0.5.0" - } - }, - "node_modules/react-markdown/node_modules/micromark-util-types": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-1.1.0.tgz", - "integrity": "sha512-ukRBgie8TIAcacscVHSiddHjO4k/q3pnedmzMQ4iwDcK0FtFCohKOlFbaOL/mPgfnPsL3C1ZyxJa4sbWrBl3jg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/react-markdown/node_modules/property-information": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", - "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/react-markdown/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "license": "MIT" - }, - "node_modules/react-markdown/node_modules/remark-parse": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-10.0.2.tgz", - "integrity": "sha512-3ydxgHa/ZQzG8LvC7jTXccARYDcRld3VfcgIIFs7bI6vbRSxJJmzgLEIIoYKyrfhaY+ujuWaf/PJiMZXoiCXgw==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^3.0.0", - "mdast-util-from-markdown": "^1.0.0", - "unified": "^10.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/remark-rehype": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-10.1.0.tgz", - "integrity": "sha512-EFmR5zppdBp0WQeDVZ/b66CWJipB2q2VLNFMabzDSGR66Z2fQii83G5gTBbgGEnEEA0QRussvrFHxk1HWGJskw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^2.0.0", - "@types/mdast": "^3.0.0", - "mdast-util-to-hast": "^12.1.0", - "unified": "^10.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unified": { - "version": "10.1.2", - "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", - "integrity": "sha512-pUSWAi/RAnVy1Pif2kAoeWNBa3JVrx0MId2LASj8G+7AiHWoKZNTomq6LG326T68U7/e263X6fTdcXIy7XnF7Q==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "bail": "^2.0.0", - "extend": "^3.0.0", - "is-buffer": "^2.0.0", - "is-plain-obj": "^4.0.0", - "trough": "^2.0.0", - "vfile": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unist-util-is": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", - "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unist-util-position": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-4.0.4.tgz", - "integrity": "sha512-kUBE91efOWfIVBo8xzh/uZQ7p9ffYRtUbMRZBNFYwf0RK8koUMx6dGUfwylLOKmaT2cs4wSW96QoYUSXAyEtpg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unist-util-stringify-position": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-3.0.3.tgz", - "integrity": "sha512-k5GzIBZ/QatR8N5X2y+drfpWG8IDBzdnVj6OInRNWm1oXrzydiaAT2OQiA8DPRRZyAKb9b6I2a6PxYklZD0gKg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unist-util-visit": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", - "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0", - "unist-util-visit-parents": "^5.1.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/unist-util-visit-parents": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", - "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/vfile": { - "version": "5.3.7", - "resolved": "https://registry.npmjs.org/vfile/-/vfile-5.3.7.tgz", - "integrity": "sha512-r7qlzkgErKjobAmyNIkkSpizsFPYiUPuJb5pNW1RB4JcYVZhs4lIbVqk8XPk033CV/1z8ss5pkax8SuhGpcG8g==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "is-buffer": "^2.0.0", - "unist-util-stringify-position": "^3.0.0", - "vfile-message": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/react-markdown/node_modules/vfile-message": { - "version": "3.1.4", - "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-3.1.4.tgz", - "integrity": "sha512-fa0Z6P8HUrQN4BZaX05SIVXic+7kE3b05PWAtPuYP9QLHsLKYR7/AlLW3NtOrpXRLeawpDLMsVkmk5DG0NXgWw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-stringify-position": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" + "@types/react": ">=18", + "react": ">=18" } }, "node_modules/react-modal": { @@ -18856,6 +18387,68 @@ "node": ">= 0.10" } }, + "node_modules/remark-code-import": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/remark-code-import/-/remark-code-import-1.2.0.tgz", + "integrity": "sha512-fgwLruqlZbVOIhCJFjY+JDwPZhA4/eK3InJzN8Ox8UDdtudpG212JwtRj6la+lAzJU7JmSEyewZSukVZdknt3Q==", + "license": "MIT", + "dependencies": { + "strip-indent": "^4.0.0", + "to-gatsby-remark-plugin": "^0.1.0", + "unist-util-visit": "^4.1.0" + }, + "engines": { + "node": ">= 12" + } + }, + "node_modules/remark-code-import/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, + "node_modules/remark-code-import/node_modules/unist-util-is": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", + "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-code-import/node_modules/unist-util-visit": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", + "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0", + "unist-util-visit-parents": "^5.1.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-code-import/node_modules/unist-util-visit-parents": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", + "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-is": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-directive": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/remark-directive/-/remark-directive-3.0.1.tgz", @@ -20298,6 +19891,18 @@ "node": ">=6" } }, + "node_modules/strip-indent": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-4.1.1.tgz", + "integrity": "sha512-SlyRoSkdh1dYP0PzclLE7r0M9sgbFKKMFXpFRUMNuKhQSbC6VQIGzq3E0qsfvGJaUFJPGv6Ws1NZ/haTAjfbMA==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -20390,41 +19995,6 @@ "node": ">= 6" } }, - "node_modules/sucrase/node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/sucrase/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -20664,6 +20234,15 @@ "node": "^18.0.0 || >=20.0.0" } }, + "node_modules/to-gatsby-remark-plugin": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/to-gatsby-remark-plugin/-/to-gatsby-remark-plugin-0.1.0.tgz", + "integrity": "sha512-blmhJ/gIrytWnWLgPSRCkhCPeki6UBK2daa3k9mGahN7GjwHu8KrS7F70MvwlsG7IE794JLgwAdCbi4hU4faFQ==", + "license": "MIT", + "dependencies": { + "to-vfile": "^6.1.0" + } + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -20676,6 +20255,69 @@ "node": ">=8.0" } }, + "node_modules/to-vfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/to-vfile/-/to-vfile-6.1.0.tgz", + "integrity": "sha512-BxX8EkCxOAZe+D/ToHdDsJcVI4HqQfmw0tCkp31zf3dNP/XWIAjU4CmeuSwsSoOzOTqHPOL0KUzyZqJplkD0Qw==", + "license": "MIT", + "dependencies": { + "is-buffer": "^2.0.0", + "vfile": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/to-vfile/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, + "node_modules/to-vfile/node_modules/unist-util-stringify-position": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-2.0.3.tgz", + "integrity": "sha512-3faScn5I+hy9VleOq/qNbAd6pAx7iH5jYBMS9I1HgQVijz/4mv5Bvw5iw1sC/90CODiKo81G/ps8AJrISn687g==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.2" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/to-vfile/node_modules/vfile": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-4.2.1.tgz", + "integrity": "sha512-O6AE4OskCG5S1emQ/4gl8zK586RqA3srz3nfK/Viy0UPToBc5Trp9BVFb1u0CjsKrAWwnpr4ifM/KBXPWwJbCA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "is-buffer": "^2.0.0", + "unist-util-stringify-position": "^2.0.0", + "vfile-message": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/to-vfile/node_modules/vfile-message": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-2.0.4.tgz", + "integrity": "sha512-DjssxRGkMvifUOJre00juHoP9DPWuzjxKuMDrhNbk2TdaYYBNMStsNhEOt3idrtI12VQYM/1+iM0KOzXi4pxwQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "unist-util-stringify-position": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -21900,12 +21542,6 @@ "url": "https://github.com/chalk/strip-ansi?sponsor=1" } }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "license": "ISC" - }, "node_modules/write-file-atomic": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz", diff --git a/docs/package.json b/docs/package.json index 6bbc48eb0..ca4d02ca1 100644 --- a/docs/package.json +++ b/docs/package.json @@ -15,7 +15,8 @@ "gen-api-docs": "docusaurus gen-api-docs", "clean-api-docs": "docusaurus clean-api-docs", "gen-api-docs:version": "docusaurus gen-api-docs:version", - "clean-api-docs:version": "docusaurus clean-api-docs:version" + "clean-api-docs:version": "docusaurus clean-api-docs:version", + "sync-files": "node scripts/sync-files.js" }, "dependencies": { "@docusaurus/core": "3.8.1", @@ -27,7 +28,11 @@ "docusaurus-theme-openapi-docs": "4.3.7", "prism-react-renderer": "^2.3.0", "react": "^19.0.0", - "react-dom": "^19.0.0" + "react-dom": "^19.0.0", + "remark-code-import": "^1.2.0" + }, + "overrides": { + "glob": "^10.5.0" }, "browserslist": { "production": [ @@ -40,5 +45,9 @@ "last 1 firefox version", "last 1 safari version" ] + }, + "devDependencies": { + "raw-loader": "^4.0.2", + "react-markdown": "^10.1.0" } } diff --git a/docs/scripts/sync-files.js b/docs/scripts/sync-files.js new file mode 100755 index 000000000..a9b5b3eaf --- /dev/null +++ b/docs/scripts/sync-files.js @@ -0,0 +1,145 @@ +#!/usr/bin/env node + +const fs = require('fs'); +const path = require('path'); + +// Repository root is always one level up from docs +const repoRoot = path.join(__dirname, '..', '..'); + +// Get all requested files from the usage tracking file +function getRequestedFiles() { + const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json'); + if (!fs.existsSync(usageFile)) { + return []; + } + + try { + const usage = JSON.parse(fs.readFileSync(usageFile, 'utf8')); + return usage.files || []; + } catch (error) { + console.warn('Could not read usage file:', error.message); + return []; + } +} + +// Track file usage +function trackFileUsage(filePath) { + const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json'); + const usageDir = path.dirname(usageFile); + + // Ensure directory exists + if (!fs.existsSync(usageDir)) { + fs.mkdirSync(usageDir, { recursive: true }); + } + + let usage = { files: [] }; + if (fs.existsSync(usageFile)) { + try { + usage = JSON.parse(fs.readFileSync(usageFile, 'utf8')); + } catch (error) { + console.warn('Could not read existing usage file, creating new one'); + } + } + + if (!usage.files.includes(filePath)) { + usage.files.push(filePath); + fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2)); + } +} + +// Filter content based on file type and options +function filterContent(content, filePath) { + let lines = content.split('\n'); + + // Skip copyright header for Python files + if (filePath.endsWith('.py')) { + // Read the license header file + const licenseHeaderPath = path.join(repoRoot, 'docs', 'license_header.txt'); + if (fs.existsSync(licenseHeaderPath)) { + try { + const licenseText = fs.readFileSync(licenseHeaderPath, 'utf8'); + const licenseLines = licenseText.trim().split('\n'); + + // Check if file starts with the license header (accounting for # comments) + if (lines.length >= licenseLines.length) { + let matches = true; + for (let i = 0; i < licenseLines.length; i++) { + const codeLine = lines[i]?.replace(/^#\s*/, '').trim(); + const licenseLine = licenseLines[i]?.trim(); + if (codeLine !== licenseLine) { + matches = false; + break; + } + } + + if (matches) { + // Skip the license header and any trailing empty lines + let skipTo = licenseLines.length; + while (skipTo < lines.length && lines[skipTo].trim() === '') { + skipTo++; + } + lines = lines.slice(skipTo); + } + } + } catch (error) { + console.warn(`Could not read license header, skipping filtering for ${filePath}`); + } + } + } + + // Trim empty lines from start and end + while (lines.length > 0 && lines[0].trim() === '') { + lines.shift(); + } + while (lines.length > 0 && lines[lines.length - 1].trim() === '') { + lines.pop(); + } + + return lines.join('\n'); +} + +// Sync a file from repo root to static directory +function syncFile(filePath) { + const sourcePath = path.join(repoRoot, filePath); + const destPath = path.join(__dirname, '..', 'static', 'imported-files', filePath); + const destDir = path.dirname(destPath); + + // Ensure destination directory exists + if (!fs.existsSync(destDir)) { + fs.mkdirSync(destDir, { recursive: true }); + } + + try { + if (fs.existsSync(sourcePath)) { + const content = fs.readFileSync(sourcePath, 'utf8'); + const filteredContent = filterContent(content, filePath); + fs.writeFileSync(destPath, filteredContent); + console.log(`✅ Synced ${filePath}`); + trackFileUsage(filePath); + return true; + } else { + console.warn(`⚠️ Source file not found: ${sourcePath}`); + return false; + } + } catch (error) { + console.error(`❌ Error syncing ${filePath}:`, error.message); + return false; + } +} + +// Main execution +console.log(`📁 Repository root: ${path.resolve(repoRoot)}`); + +// Get files that are being requested by the documentation +const requestedFiles = getRequestedFiles(); +console.log(`📄 Syncing ${requestedFiles.length} requested files...`); + +if (requestedFiles.length === 0) { + console.log('ℹ️ No files requested yet. Files will be synced when first referenced in documentation.'); +} else { + requestedFiles.forEach(filePath => { + syncFile(filePath); + }); +} + +console.log('✅ File sync complete!'); diff --git a/docs/sidebars.ts b/docs/sidebars.ts index f2cfe3798..7b4ac5ac8 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = { 'distributions/importing_as_library', 'distributions/configuration', 'distributions/starting_llama_stack_server', + 'distributions/llama_stack_ui', { type: 'category', label: 'Self-Hosted Distributions', @@ -242,15 +243,6 @@ const sidebars: SidebarsConfig = { 'providers/eval/remote_nvidia' ], }, - { - type: 'category', - label: 'Telemetry', - collapsed: true, - items: [ - 'providers/telemetry/index', - 'providers/telemetry/inline_meta-reference' - ], - }, { type: 'category', label: 'Batches', diff --git a/docs/src/components/CodeFromFile.jsx b/docs/src/components/CodeFromFile.jsx new file mode 100644 index 000000000..94580f8c5 --- /dev/null +++ b/docs/src/components/CodeFromFile.jsx @@ -0,0 +1,93 @@ +import React, { useState, useEffect } from 'react'; +import CodeBlock from '@theme/CodeBlock'; + +export default function CodeFromFile({ + src, + language = 'python', + title, + startLine, + endLine, + highlightLines +}) { + const [content, setContent] = useState(''); + const [error, setError] = useState(null); + + useEffect(() => { + async function loadFile() { + try { + // File registration is now handled by the file-sync-plugin during build + + // Load file from static/imported-files directory + const response = await fetch(`/imported-files/${src}`); + if (!response.ok) { + throw new Error(`Failed to fetch: ${response.status}`); + } + let text = await response.text(); + + // Handle line range if specified (filtering is done at build time) + if (startLine || endLine) { + const lines = text.split('\n'); + const start = startLine ? Math.max(0, startLine - 1) : 0; + const end = endLine ? Math.min(lines.length, endLine) : lines.length; + text = lines.slice(start, end).join('\n'); + } + + setContent(text); + } catch (err) { + console.error('Failed to load file:', err); + setError(`Failed to load ${src}: ${err.message}`); + } + } + + loadFile(); + }, [src, startLine, endLine]); + + if (error) { + return
+ Error: {error} +
; + } + + if (!content) { + return
Loading {src}...
; + } + + // Auto-detect language from file extension if not provided + const detectedLanguage = language || getLanguageFromExtension(src); + + return ( + + {content} + + ); +} + +function getLanguageFromExtension(filename) { + const ext = filename.split('.').pop(); + const languageMap = { + 'py': 'python', + 'js': 'javascript', + 'jsx': 'jsx', + 'ts': 'typescript', + 'tsx': 'tsx', + 'md': 'markdown', + 'sh': 'bash', + 'yaml': 'yaml', + 'yml': 'yaml', + 'json': 'json', + 'css': 'css', + 'html': 'html', + 'cpp': 'cpp', + 'c': 'c', + 'java': 'java', + 'go': 'go', + 'rs': 'rust', + 'php': 'php', + 'rb': 'ruby', + }; + return languageMap[ext] || 'text'; +} diff --git a/docs/src/pages/index.js b/docs/src/pages/index.js index f460d6f27..7489682aa 100644 --- a/docs/src/pages/index.js +++ b/docs/src/pages/index.js @@ -13,7 +13,7 @@ function HomepageHeader() {

Build AI Applications with Llama Stack

- Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry + Unified APIs for Inference, RAG, Agents, Tools, and Safety

+ description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html deleted file mode 100644 index d920317cf..000000000 --- a/docs/static/deprecated-llama-stack-spec.html +++ /dev/null @@ -1,13520 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - - - - - - - diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 66b2caeca..2d0ce6e08 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1,1432 +1,19 @@ openapi: 3.1.0 info: - title: >- - Llama Stack Specification - Deprecated APIs - version: v1 - description: >- + title: Llama Stack Specification - Deprecated APIs + description: |- This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are - tailored to - best leverage Llama Models. + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. - **⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for - migration reference only. + **⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for + migration reference only. + version: v1 servers: - - url: http://any-hosted-llama-stack.com +- url: http://any-hosted-llama-stack.com paths: - /v1/agents: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all agents. - description: List all agents. - parameters: - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of agents to return. - required: false - schema: - type: integer - deprecated: true - post: - responses: - '200': - description: >- - An AgentCreateResponse with the agent ID. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Create an agent with the given configuration. - description: >- - Create an agent with the given configuration. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentRequest' - required: true - deprecated: true - /v1/agents/{agent_id}: - get: - responses: - '200': - description: An Agent of the agent. - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Describe an agent by its ID. - description: Describe an agent by its ID. - parameters: - - name: agent_id - in: path - description: ID of the agent. - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent by its ID and its associated sessions and turns. - description: >- - Delete an agent by its ID and its associated sessions and turns. - parameters: - - name: agent_id - in: path - description: The ID of the agent to delete. - required: true - schema: - type: string - deprecated: true - /v1/agents/{agent_id}/session: - post: - responses: - '200': - description: An AgentSessionCreateResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentSessionCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new session for an agent. - description: Create a new session for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the session for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentSessionRequest' - required: true - deprecated: true - /v1/agents/{agent_id}/session/{session_id}: - get: - responses: - '200': - description: A Session. - content: - application/json: - schema: - $ref: '#/components/schemas/Session' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent session by its ID. - description: Retrieve an agent session by its ID. - parameters: - - name: session_id - in: path - description: The ID of the session to get. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to get the session for. - required: true - schema: - type: string - - name: turn_ids - in: query - description: >- - (Optional) List of turn IDs to filter the session by. - required: false - schema: - type: array - items: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent session by its ID and its associated turns. - description: >- - Delete an agent session by its ID and its associated turns. - parameters: - - name: session_id - in: path - description: The ID of the session to delete. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to delete the session for. - required: true - schema: - type: string - deprecated: true - /v1/agents/{agent_id}/session/{session_id}/turn: - post: - responses: - '200': - description: >- - If stream=False, returns a Turn object. If stream=True, returns an SSE - event stream of AgentTurnResponseStreamChunk. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new turn for an agent. - description: Create a new turn for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to create the turn for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentTurnRequest' - required: true - deprecated: true - /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}: - get: - responses: - '200': - description: A Turn. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent turn by its ID. - description: Retrieve an agent turn by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the turn for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get. - required: true - schema: - type: string - deprecated: true - /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: - post: - responses: - '200': - description: >- - A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk - objects. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Resume an agent turn with executed tool call responses. - description: >- - Resume an agent turn with executed tool call responses. - - When a Turn has the status `awaiting_input` due to pending input from client - side tool calls, this endpoint can be used to submit the outputs from the - tool calls once they are ready. - parameters: - - name: agent_id - in: path - description: The ID of the agent to resume. - required: true - schema: - type: string - - name: session_id - in: path - description: The ID of the session to resume. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to resume. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ResumeAgentTurnRequest' - required: true - deprecated: true - /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: - get: - responses: - '200': - description: An AgentStepResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentStepResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent step by its ID. - description: Retrieve an agent step by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the step for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the step for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get the step for. - required: true - schema: - type: string - - name: step_id - in: path - description: The ID of the step to get. - required: true - schema: - type: string - deprecated: true - /v1/agents/{agent_id}/sessions: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all session(s) of a given agent. - description: List all session(s) of a given agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to list sessions for. - required: true - schema: - type: string - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of sessions to return. - required: false - schema: - type: integer - deprecated: true - /v1/datasetio/append-rows/{dataset_id}: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - DatasetIO - summary: Append rows to a dataset. - description: Append rows to a dataset. - parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to append the rows to. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/AppendRowsRequest' - required: true - deprecated: true - /v1/datasetio/iterrows/{dataset_id}: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - DatasetIO - summary: >- - Get a paginated list of rows from a dataset. - description: >- - Get a paginated list of rows from a dataset. - - Uses offset-based pagination where: - - - start_index: The starting index (0-based). If None, starts from beginning. - - - limit: Number of items to return. If None or -1, returns all items. - - - The response includes: - - - data: List of items for the current page. - - - has_more: Whether there are more items available after this set. - parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to get the rows from. - required: true - schema: - type: string - - name: start_index - in: query - description: >- - Index into dataset for the first row to get. Get all rows if None. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of rows to get. - required: false - schema: - type: integer - deprecated: true - /v1/datasets: - get: - responses: - '200': - description: A ListDatasetsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListDatasetsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: List all datasets. - description: List all datasets. - parameters: [] - deprecated: true - post: - responses: - '200': - description: A Dataset. - content: - application/json: - schema: - $ref: '#/components/schemas/Dataset' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: Register a new dataset. - description: Register a new dataset. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterDatasetRequest' - required: true - deprecated: true - /v1/datasets/{dataset_id}: - get: - responses: - '200': - description: A Dataset. - content: - application/json: - schema: - $ref: '#/components/schemas/Dataset' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: Get a dataset by its ID. - description: Get a dataset by its ID. - parameters: - - name: dataset_id - in: path - description: The ID of the dataset to get. - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: Unregister a dataset by its ID. - description: Unregister a dataset by its ID. - parameters: - - name: dataset_id - in: path - description: The ID of the dataset to unregister. - required: true - schema: - type: string - deprecated: true - /v1/eval/benchmarks: - get: - responses: - '200': - description: A ListBenchmarksResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListBenchmarksResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: List all benchmarks. - description: List all benchmarks. - parameters: [] - deprecated: true - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: Register a benchmark. - description: Register a benchmark. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterBenchmarkRequest' - required: true - deprecated: true - /v1/eval/benchmarks/{benchmark_id}: - get: - responses: - '200': - description: A Benchmark. - content: - application/json: - schema: - $ref: '#/components/schemas/Benchmark' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: Get a benchmark by its ID. - description: Get a benchmark by its ID. - parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to get. - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: Unregister a benchmark. - description: Unregister a benchmark. - parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to unregister. - required: true - schema: - type: string - deprecated: true - /v1/eval/benchmarks/{benchmark_id}/evaluations: - post: - responses: - '200': - description: >- - EvaluateResponse object containing generations and scores. - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - summary: Evaluate a list of rows on a benchmark. - description: Evaluate a list of rows on a benchmark. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateRowsRequest' - required: true - deprecated: true - /v1/eval/benchmarks/{benchmark_id}/jobs: - post: - responses: - '200': - description: >- - The job that was created to run the evaluation. - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - summary: Run an evaluation on a benchmark. - description: Run an evaluation on a benchmark. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RunEvalRequest' - required: true - deprecated: true - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: - get: - responses: - '200': - description: The status of the evaluation job. - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - summary: Get the status of a job. - description: Get the status of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - summary: Cancel a job. - description: Cancel a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - deprecated: true - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: - get: - responses: - '200': - description: The result of the job. - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - summary: Get the result of a job. - description: Get the result of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/chat/completions: - get: - responses: - '200': - description: A ListOpenAIChatCompletionResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Inference - summary: List chat completions. - description: List chat completions. - parameters: - - name: after - in: query - description: >- - The ID of the last chat completion to return. - required: false - schema: - type: string - - name: limit - in: query - description: >- - The maximum number of chat completions to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort the chat completions by: "asc" or "desc". Defaults to - "desc". - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: true - post: - responses: - '200': - description: An OpenAIChatCompletion. - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletion' - - $ref: '#/components/schemas/OpenAIChatCompletionChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Inference - summary: Create chat completions. - description: >- - Create chat completions. - - Generate an OpenAI-compatible chat completion for the given messages using - the specified model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' - required: true - deprecated: true - /v1/openai/v1/chat/completions/{completion_id}: - get: - responses: - '200': - description: A OpenAICompletionWithInputMessages. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAICompletionWithInputMessages' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Inference - summary: Get chat completion. - description: >- - Get chat completion. - - Describe a chat completion by its ID. - parameters: - - name: completion_id - in: path - description: ID of the chat completion. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/completions: - post: - responses: - '200': - description: An OpenAICompletion. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAICompletion' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Inference - summary: Create completion. - description: >- - Create completion. - - Generate an OpenAI-compatible completion for the given prompt using the specified - model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' - required: true - deprecated: true - /v1/openai/v1/embeddings: - post: - responses: - '200': - description: >- - An OpenAIEmbeddingsResponse containing the embeddings. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIEmbeddingsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Inference - summary: Create embeddings. - description: >- - Create embeddings. - - Generate OpenAI-compatible embeddings for the given input using the specified - model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' - required: true - deprecated: true - /v1/openai/v1/files: - get: - responses: - '200': - description: >- - An ListOpenAIFileResponse containing the list of files. - content: - application/json: - schema: - $ref: '#/components/schemas/ListOpenAIFileResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Files - summary: List files. - description: >- - List files. - - Returns a list of files that belong to the user's organization. - parameters: - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. For instance, if you make a list request and receive - 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo - in order to fetch the next page of the list. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 10,000, and the default is 10,000. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - $ref: '#/components/schemas/Order' - - name: purpose - in: query - description: >- - Only return files with the given purpose. - required: false - schema: - $ref: '#/components/schemas/OpenAIFilePurpose' - deprecated: true - post: - responses: - '200': - description: >- - An OpenAIFileObject representing the uploaded file. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIFileObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Files - summary: Upload file. - description: >- - Upload file. - - Upload a file that can be used across various endpoints. - - - The file upload should be a multipart form request with: - - - file: The File object (not file name) to be uploaded. - - - purpose: The intended purpose of the uploaded file. - - - expires_after: Optional form values describing expiration for the file. - parameters: [] - requestBody: - content: - multipart/form-data: - schema: - type: object - properties: - file: - type: string - format: binary - purpose: - $ref: '#/components/schemas/OpenAIFilePurpose' - expires_after: - $ref: '#/components/schemas/ExpiresAfter' - required: - - file - - purpose - required: true - deprecated: true - /v1/openai/v1/files/{file_id}: - get: - responses: - '200': - description: >- - An OpenAIFileObject containing file information. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIFileObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Files - summary: Retrieve file. - description: >- - Retrieve file. - - Returns information about a specific file. - parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: >- - An OpenAIFileDeleteResponse indicating successful deletion. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIFileDeleteResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Files - summary: Delete file. - description: Delete file. - parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/files/{file_id}/content: - get: - responses: - '200': - description: >- - The raw file content as a binary response. - content: - application/json: - schema: - $ref: '#/components/schemas/Response' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Files - summary: Retrieve file content. - description: >- - Retrieve file content. - - Returns the contents of the specified file. - parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/models: + /v1/models: get: responses: '200': @@ -1436,8698 +23,10005 @@ paths: schema: $ref: '#/components/schemas/OpenAIListModelsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: List models using the OpenAI API. + - Models + summary: Openai List Models description: List models using the OpenAI API. - parameters: [] - deprecated: true - /v1/openai/v1/moderations: + operationId: openai_list_models_v1_models_get post: responses: '200': - description: A moderation object. + description: A Model. content: application/json: schema: - $ref: '#/components/schemas/ModerationObject' + $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Create moderation. - description: >- - Create moderation. + - Models + summary: Register Model + description: |- + Register model. - Classifies if text and/or image inputs are potentially harmful. - parameters: [] + Register a model. + operationId: register_model_v1_models_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/RunModerationRequest' + $ref: '#/components/schemas/RegisterModelRequest' required: true deprecated: true - /v1/openai/v1/responses: + /v1/models/{model_id}: get: responses: '200': - description: A ListOpenAIResponseObject. + description: A Model. content: application/json: schema: - $ref: '#/components/schemas/ListOpenAIResponseObject' + $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: List all responses. - description: List all responses. + - Models + summary: Get Model + description: |- + Get model. + + Get a model by its identifier. + operationId: get_model_v1_models__model_id__get parameters: - - name: after - in: query - description: The ID of the last response to return. - required: false - schema: - type: string - - name: limit - in: query - description: The number of responses to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter responses by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort responses by when sorted by created_at ('asc' or 'desc'). - required: false - schema: - $ref: '#/components/schemas/Order' + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' + delete: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Models + summary: Unregister Model + description: |- + Unregister model. + + Unregister a model. + operationId: unregister_model_v1_models__model_id__delete + parameters: + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' deprecated: true + /v1/scoring-functions: + get: + responses: + '200': + description: A ListScoringFunctionsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Scoring Functions + summary: List Scoring Functions + description: List all scoring functions. + operationId: list_scoring_functions_v1_scoring_functions_get + post: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Scoring Functions + summary: Register Scoring Function + description: Register a scoring function. + operationId: register_scoring_function_v1_scoring_functions_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterScoringFunctionRequest' + required: true + deprecated: true + /v1/scoring-functions/{scoring_fn_id}: + get: + responses: + '200': + description: A ScoringFn. + content: + application/json: + schema: + $ref: '#/components/schemas/ScoringFn' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Scoring Functions + summary: Get Scoring Function + description: Get a scoring function by its ID. + operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get + parameters: + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' + delete: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Scoring Functions + summary: Unregister Scoring Function + description: Unregister a scoring function. + operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete + parameters: + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' + deprecated: true + /v1/shields: + get: + responses: + '200': + description: A ListShieldsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListShieldsResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Shields + summary: List Shields + description: List all shields. + operationId: list_shields_v1_shields_get post: responses: '200': - description: An OpenAIResponseObject. + description: A Shield. content: application/json: schema: - $ref: '#/components/schemas/OpenAIResponseObject' - text/event-stream: - schema: - $ref: '#/components/schemas/OpenAIResponseObjectStream' + $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Create a model response. - description: Create a model response. - parameters: [] + - Shields + summary: Register Shield + description: Register a shield. + operationId: register_shield_v1_shields_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/CreateOpenaiResponseRequest' + $ref: '#/components/schemas/RegisterShieldRequest' required: true deprecated: true - x-llama-stack-extra-body-params: - - name: guardrails - schema: - type: array - items: - oneOf: - - type: string - - $ref: '#/components/schemas/ResponseGuardrailSpec' - description: >- - List of guardrails to apply during response generation. Guardrails provide - safety and content moderation. - required: false - /v1/openai/v1/responses/{response_id}: + /v1/shields/{identifier}: get: responses: '200': - description: An OpenAIResponseObject. + description: A Shield. content: application/json: schema: - $ref: '#/components/schemas/OpenAIResponseObject' + $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Get a model response. - description: Get a model response. + - Shields + summary: Get Shield + description: Get a shield by its identifier. + operationId: get_shield_v1_shields__identifier__get parameters: - - name: response_id - in: path - description: >- - The ID of the OpenAI response to retrieve. - required: true - schema: - type: string + - name: identifier + in: path + required: true + schema: + type: string + description: 'Path parameter: identifier' + delete: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Shields + summary: Unregister Shield + description: Unregister a shield. + operationId: unregister_shield_v1_shields__identifier__delete + parameters: + - name: identifier + in: path + required: true + schema: + type: string + description: 'Path parameter: identifier' + deprecated: true + /v1/tool-runtime/invoke: + post: + responses: + '200': + description: A ToolInvocationResult. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolInvocationResult' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Tool Runtime + summary: Invoke Tool + description: Run a tool with the given arguments. + operationId: invoke_tool_v1_tool_runtime_invoke_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/InvokeToolRequest' + required: true + deprecated: true + /v1/tool-runtime/list-tools: + get: + responses: + '200': + description: A ListToolDefsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolDefsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Tool Runtime + summary: List Runtime Tools + description: List all tools in the runtime. + operationId: list_runtime_tools_v1_tool_runtime_list_tools_get + parameters: + - name: authorization + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Authorization + - name: tool_group_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Tool Group Id + - name: mcp_endpoint + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/URL' + - type: 'null' + title: Mcp Endpoint + deprecated: true + /v1/toolgroups: + get: + responses: + '200': + description: A ListToolGroupsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Tool Groups + summary: List Tool Groups + description: List tool groups with optional provider. + operationId: list_tool_groups_v1_toolgroups_get + deprecated: true + post: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Tool Groups + summary: Register Tool Group + description: Register a tool group. + operationId: register_tool_group_v1_toolgroups_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterToolGroupRequest' + required: true + deprecated: true + /v1/toolgroups/{toolgroup_id}: + get: + responses: + '200': + description: A ToolGroup. + content: + application/json: + schema: + $ref: '#/components/schemas/ToolGroup' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Tool Groups + summary: Get Tool Group + description: Get a tool group by its ID. + operationId: get_tool_group_v1_toolgroups__toolgroup_id__get + parameters: + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' deprecated: true delete: responses: - '200': - description: An OpenAIDeleteResponseObject - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIDeleteResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Agents - summary: Delete a response. - description: Delete a response. + - Tool Groups + summary: Unregister Toolgroup + description: Unregister a tool group. + operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete parameters: - - name: response_id - in: path - description: The ID of the OpenAI response to delete. - required: true - schema: - type: string + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' deprecated: true - /v1/openai/v1/responses/{response_id}/input_items: + /v1/tools: get: responses: '200': - description: An ListOpenAIResponseInputItem. + description: A ListToolDefsResponse. content: application/json: schema: - $ref: '#/components/schemas/ListOpenAIResponseInputItem' + $ref: '#/components/schemas/ListToolDefsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List input items. - description: List input items. + - Tool Groups + summary: List Tools + description: List tools with optional tool group. + operationId: list_tools_v1_tools_get parameters: - - name: response_id - in: path - description: >- - The ID of the response to retrieve input items for. - required: true - schema: - type: string - - name: after - in: query - description: >- - An item ID to list items after, used for pagination. - required: false - schema: - type: string - - name: before - in: query - description: >- - An item ID to list items before, used for pagination. - required: false - schema: - type: string - - name: include - in: query - description: >- - Additional fields to include in the response. - required: false - schema: - type: array - items: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - The order to return the input items in. Default is desc. - required: false - schema: - $ref: '#/components/schemas/Order' + - name: toolgroup_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Toolgroup Id deprecated: true - /v1/openai/v1/vector_stores: + /v1/tools/{tool_name}: get: responses: '200': - description: >- - A VectorStoreListResponse containing the list of vector stores. + description: A ToolDef. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreListResponse' + $ref: '#/components/schemas/ToolDef' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Returns a list of vector stores. - description: Returns a list of vector stores. + - Tool Groups + summary: Get Tool + description: Get a tool by its name. + operationId: get_tool_v1_tools__tool_name__get parameters: - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string + - name: tool_name + in: path + required: true + schema: + type: string + description: 'Path parameter: tool_name' deprecated: true + /v1beta/datasets: + get: + responses: + '200': + description: A ListDatasetsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListDatasetsResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Datasets + summary: List Datasets + description: List all datasets. + operationId: list_datasets_v1beta_datasets_get post: responses: '200': - description: >- - A VectorStoreObject representing the created vector store. + description: A Dataset. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreObject' + $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Creates a vector store. - description: >- - Creates a vector store. - - Generate an OpenAI-compatible vector store with the given parameters. - parameters: [] + - Datasets + summary: Register Dataset + description: Register a new dataset. + operationId: register_dataset_v1beta_datasets_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' + $ref: '#/components/schemas/RegisterDatasetRequest' required: true deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}: + /v1beta/datasets/{dataset_id}: get: responses: '200': - description: >- - A VectorStoreObject representing the vector store. + description: A Dataset. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreObject' + $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store. - description: Retrieves a vector store. + - Datasets + summary: Get Dataset + description: Get a dataset by its ID. + operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to retrieve. - required: true - schema: - type: string - deprecated: true - post: - responses: - '200': - description: >- - A VectorStoreObject representing the updated vector store. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Updates a vector store. - description: Updates a vector store. - parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to update. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' + - name: dataset_id + in: path required: true - deprecated: true + schema: + type: string + description: 'Path parameter: dataset_id' delete: responses: - '200': - description: >- - A VectorStoreDeleteResponse indicating the deletion status. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - VectorIO - summary: Delete a vector store. - description: Delete a vector store. + - Datasets + summary: Unregister Dataset + description: Unregister a dataset by its ID. + operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to delete. - required: true - schema: - type: string + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/file_batches: - post: + /v1alpha/eval/benchmarks: + get: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the created file batch. + description: A ListBenchmarksResponse. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileBatchObject' + $ref: '#/components/schemas/ListBenchmarksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Create a vector store file batch. - description: >- - Create a vector store file batch. - - Generate an OpenAI-compatible vector store file batch for the given vector - store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + - Benchmarks + summary: List Benchmarks + description: List all benchmarks. + operationId: list_benchmarks_v1alpha_eval_benchmarks_get + post: + responses: + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response + tags: + - Benchmarks + summary: Register Benchmark + description: Register a benchmark. + operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' + $ref: '#/components/schemas/RegisterBenchmarkRequest' required: true deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: + /v1alpha/eval/benchmarks/{benchmark_id}: get: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the file batch. + description: A Benchmark. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileBatchObject' + $ref: '#/components/schemas/Benchmark' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieve a vector store file batch. - description: Retrieve a vector store file batch. + - Benchmarks + summary: Get Benchmark + description: Get a benchmark by its ID. + operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: - - name: batch_id - in: path - description: The ID of the file batch to retrieve. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: - post: - responses: - '200': - description: >- - A VectorStoreFileBatchObject representing the cancelled file batch. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileBatchObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Cancels a vector store file batch. - description: Cancels a vector store file batch. - parameters: - - name: batch_id - in: path - description: The ID of the file batch to cancel. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: - get: - responses: - '200': - description: >- - A VectorStoreFilesListInBatchResponse containing the list of files in - the batch. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: >- - Returns a list of vector store files in a batch. - description: >- - Returns a list of vector store files in a batch. - parameters: - - name: batch_id - in: path - description: >- - The ID of the file batch to list files from. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - Filter by file status. One of in_progress, completed, failed, cancelled. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/files: - get: - responses: - '200': - description: >- - A VectorStoreListFilesResponse containing the list of files. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreListFilesResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: List files in a vector store. - description: List files in a vector store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to list files from. - required: true - schema: - type: string - - name: limit - in: query - description: >- - (Optional) A limit on the number of objects to be returned. Limit can - range between 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - (Optional) Sort order by the `created_at` timestamp of the objects. `asc` - for ascending order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - (Optional) A cursor for use in pagination. `after` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - (Optional) A cursor for use in pagination. `before` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - (Optional) Filter by file status to only return files with the specified - status. - required: false - schema: - $ref: '#/components/schemas/VectorStoreFileStatus' - deprecated: true - post: - responses: - '200': - description: >- - A VectorStoreFileObject representing the attached file. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Attach a file to a vector store. - description: Attach a file to a vector store. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to attach the file to. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' + - name: benchmark_id + in: path required: true - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}: - get: - responses: - '200': - description: >- - A VectorStoreFileObject representing the file. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Retrieves a vector store file. - description: Retrieves a vector store file. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: true - post: - responses: - '200': - description: >- - A VectorStoreFileObject representing the updated file. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileObject' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Updates a vector store file. - description: Updates a vector store file. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to update. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to update. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' - required: true - deprecated: true + schema: + type: string + description: 'Path parameter: benchmark_id' delete: responses: - '200': - description: >- - A VectorStoreFileDeleteResponse indicating the deletion status. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - VectorIO - summary: Delete a vector store file. - description: Delete a vector store file. + - Benchmarks + summary: Unregister Benchmark + description: Unregister a benchmark. + operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to delete. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to delete. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content: - get: - responses: - '200': - description: >- - A list of InterleavedContent representing the file contents. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: >- - Retrieves the contents of a vector store file. - description: >- - Retrieves the contents of a vector store file. - parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: true - /v1/openai/v1/vector_stores/{vector_store_id}/search: - post: - responses: - '200': - description: >- - A VectorStoreSearchResponse containing the search results. - content: - application/json: - schema: - $ref: '#/components/schemas/VectorStoreSearchResponsePage' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - VectorIO - summary: Search for chunks in a vector store. - description: >- - Search for chunks in a vector store. - - Searches a vector store for relevant chunks based on a query and optional - file attribute filters. - parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to search. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' + - name: benchmark_id + in: path required: true + schema: + type: string + description: 'Path parameter: benchmark_id' deprecated: true - /v1/post-training/job/artifacts: - get: - responses: - '200': - description: A PostTrainingJobArtifactsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Get the artifacts of a training job. - description: Get the artifacts of a training job. - parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the artifacts of. - required: true - schema: - type: string - deprecated: true - /v1/post-training/job/cancel: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Cancel a training job. - description: Cancel a training job. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CancelTrainingJobRequest' - required: true - deprecated: true - /v1/post-training/job/status: - get: - responses: - '200': - description: A PostTrainingJobStatusResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Get the status of a training job. - description: Get the status of a training job. - parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the status of. - required: true - schema: - type: string - deprecated: true - /v1/post-training/jobs: - get: - responses: - '200': - description: A ListPostTrainingJobsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListPostTrainingJobsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Get all training jobs. - description: Get all training jobs. - parameters: [] - deprecated: true - /v1/post-training/preference-optimize: - post: - responses: - '200': - description: A PostTrainingJob. - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJob' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Run preference optimization of a model. - description: Run preference optimization of a model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/PreferenceOptimizeRequest' - required: true - deprecated: true - /v1/post-training/supervised-fine-tune: - post: - responses: - '200': - description: A PostTrainingJob. - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJob' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - PostTraining (Coming Soon) - summary: Run supervised fine-tuning of a model. - description: Run supervised fine-tuning of a model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SupervisedFineTuneRequest' - required: true - deprecated: true -jsonSchemaDialect: >- - https://json-schema.org/draft/2020-12/schema components: schemas: Error: - type: object + description: Error response from the API. Roughly follows RFC 7807. properties: status: + title: Status type: integer - description: HTTP status code title: + title: Title type: string - description: >- - Error title, a short summary of the error which is invariant for an error - type detail: + title: Detail type: string - description: >- - Error detail, a longer human-readable description of the error instance: - type: string - description: >- - (Optional) A URL which can be used to retrieve more information about - the specific occurrence of the error - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - status - - title - - detail + - status + - title + - detail title: Error - description: >- - Error response from the API. Roughly follows RFC 7807. - PaginatedResponse: type: object + ListBatchesResponse: properties: - data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The list of items for the current page - has_more: - type: boolean - description: >- - Whether there are more items available after this set - url: - type: string - description: The URL for accessing this list - additionalProperties: false - required: - - data - - has_more - title: PaginatedResponse - description: >- - A generic paginated response that follows a simple format. - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - description: >- - The model identifier to use for the agent - instructions: - type: string - description: The system instructions for the agent - name: - type: string - description: >- - Optional name for the agent, used in telemetry and identification - enable_session_persistence: - type: boolean - default: false - description: >- - Optional flag indicating whether session data has to be persisted - response_format: - $ref: '#/components/schemas/ResponseFormat' - description: Optional response format configuration - additionalProperties: false - required: - - model - - instructions - title: AgentConfig - description: Configuration for an agent. - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - GrammarResponseFormat: - type: object - properties: - type: - type: string - enum: - - json_schema - - grammar - description: >- - Must be "grammar" to identify this format type - const: grammar - default: grammar - bnf: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The BNF grammar specification the response should conform to - additionalProperties: false - required: - - type - - bnf - title: GrammarResponseFormat - description: >- - Configuration for grammar-guided response generation. - GreedySamplingStrategy: - type: object - properties: - type: - type: string - const: greedy - default: greedy - description: >- - Must be "greedy" to identify this sampling strategy - additionalProperties: false - required: - - type - title: GreedySamplingStrategy - description: >- - Greedy sampling strategy that selects the highest probability token at each - step. - JsonSchemaResponseFormat: - type: object - properties: - type: - type: string - enum: - - json_schema - - grammar - description: >- - Must be "json_schema" to identify this format type - const: json_schema - default: json_schema - json_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. - additionalProperties: false - required: - - type - - json_schema - title: JsonSchemaResponseFormat - description: >- - Configuration for JSON schema-guided response generation. - ResponseFormat: - oneOf: - - $ref: '#/components/schemas/JsonSchemaResponseFormat' - - $ref: '#/components/schemas/GrammarResponseFormat' - discriminator: - propertyName: type - mapping: - json_schema: '#/components/schemas/JsonSchemaResponseFormat' - grammar: '#/components/schemas/GrammarResponseFormat' - SamplingParams: - type: object - properties: - strategy: - oneOf: - - $ref: '#/components/schemas/GreedySamplingStrategy' - - $ref: '#/components/schemas/TopPSamplingStrategy' - - $ref: '#/components/schemas/TopKSamplingStrategy' - discriminator: - propertyName: type - mapping: - greedy: '#/components/schemas/GreedySamplingStrategy' - top_p: '#/components/schemas/TopPSamplingStrategy' - top_k: '#/components/schemas/TopKSamplingStrategy' - description: The sampling strategy. - max_tokens: - type: integer - default: 0 - description: >- - The maximum number of tokens that can be generated in the completion. - The token count of your prompt plus max_tokens cannot exceed the model's - context length. - repetition_penalty: - type: number - default: 1.0 - description: >- - Number between -2.0 and 2.0. Positive values penalize new tokens based - on whether they appear in the text so far, increasing the model's likelihood - to talk about new topics. - stop: - type: array - items: - type: string - description: >- - Up to 4 sequences where the API will stop generating further tokens. The - returned text will not contain the stop sequence. - additionalProperties: false - required: - - strategy - title: SamplingParams - description: Sampling parameters. - ToolConfig: - type: object - properties: - tool_choice: - oneOf: - - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following - capabilities of the model. - - type: string - default: auto - description: >- - (Optional) Whether tool use is automatic, required, or none. Can also - specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - (Optional) Instructs the model how to format tool calls. By default, Llama - Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a - tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python - syntax -- a list of function calls. - system_message_behavior: - type: string - enum: - - append - - replace - description: >- - (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: - Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: - Replaces the default system prompt with the provided system message. The - system message can include the string '{{function_definitions}}' to indicate - where the function definitions should be inserted. - default: append - additionalProperties: false - title: ToolConfig - description: Configuration for tool use. - ToolDef: - type: object - properties: - toolgroup_id: - type: string - description: >- - (Optional) ID of the tool group this tool belongs to - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Human-readable description of what the tool does - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool inputs (MCP inputSchema) - output_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool outputs (MCP outputSchema) - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - name - title: ToolDef - description: >- - Tool definition used in runtime contexts. - TopKSamplingStrategy: - type: object - properties: - type: - type: string - const: top_k - default: top_k - description: >- - Must be "top_k" to identify this sampling strategy - top_k: - type: integer - description: >- - Number of top tokens to consider for sampling. Must be at least 1 - additionalProperties: false - required: - - type - - top_k - title: TopKSamplingStrategy - description: >- - Top-k sampling strategy that restricts sampling to the k most likely tokens. - TopPSamplingStrategy: - type: object - properties: - type: - type: string - const: top_p - default: top_p - description: >- - Must be "top_p" to identify this sampling strategy - temperature: - type: number - description: >- - Controls randomness in sampling. Higher values increase randomness - top_p: - type: number - default: 0.95 - description: >- - Cumulative probability threshold for nucleus sampling. Defaults to 0.95 - additionalProperties: false - required: - - type - title: TopPSamplingStrategy - description: >- - Top-p (nucleus) sampling strategy that samples from the smallest set of tokens - with cumulative probability >= p. - CreateAgentRequest: - type: object - properties: - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: The configuration for the agent. - additionalProperties: false - required: - - agent_config - title: CreateAgentRequest - AgentCreateResponse: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the created agent - additionalProperties: false - required: - - agent_id - title: AgentCreateResponse - description: >- - Response returned when creating a new agent. - Agent: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the agent - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: Configuration settings for the agent - created_at: - type: string - format: date-time - description: Timestamp when the agent was created - additionalProperties: false - required: - - agent_id - - agent_config - - created_at - title: Agent - description: >- - An agent instance with configuration and metadata. - CreateAgentSessionRequest: - type: object - properties: - session_name: - type: string - description: The name of the session to create. - additionalProperties: false - required: - - session_name - title: CreateAgentSessionRequest - AgentSessionCreateResponse: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the created session - additionalProperties: false - required: - - session_id - title: AgentSessionCreateResponse - description: >- - Response returned when creating a new agent session. - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - ImageContentItem: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the content item. Always "image" - image: - type: object - properties: - url: - $ref: '#/components/schemas/URL' - description: >- - A URL of the image or data URL in the format of data:image/{type};base64,{data}. - Note that URL could have length limits. - data: - type: string - contentEncoding: base64 - description: base64 encoded image data as string - additionalProperties: false - description: >- - Image as a base64 encoded string or an URL - additionalProperties: false - required: - - type - - image - title: ImageContentItem - description: A image content item - InferenceStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: inference - default: inference - model_response: - $ref: '#/components/schemas/CompletionMessage' - description: The response from the LLM. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - model_response - title: InferenceStep - description: An inference step in an agent turn. - InterleavedContent: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - InterleavedContentItem: - oneOf: - - $ref: '#/components/schemas/ImageContentItem' - - $ref: '#/components/schemas/TextContentItem' - discriminator: - propertyName: type - mapping: - image: '#/components/schemas/ImageContentItem' - text: '#/components/schemas/TextContentItem' - MemoryRetrievalStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: memory_retrieval - default: memory_retrieval - vector_db_ids: - type: string - description: >- - The IDs of the vector databases to retrieve context from. - inserted_context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The context retrieved from the vector databases. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - vector_db_ids - - inserted_context - title: MemoryRetrievalStep - description: >- - A memory retrieval step in an agent turn. - SafetyViolation: - type: object - properties: - violation_level: - $ref: '#/components/schemas/ViolationLevel' - description: Severity level of the violation - user_message: - type: string - description: >- - (Optional) Message to convey to the user about the violation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata including specific violation codes for debugging and - telemetry - additionalProperties: false - required: - - violation_level - - metadata - title: SafetyViolation - description: >- - Details of a safety violation detected by content moderation. - Session: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the conversation session - session_name: - type: string - description: Human-readable name for the session - turns: - type: array - items: - $ref: '#/components/schemas/Turn' - description: >- - List of all turns that have occurred in this session - started_at: - type: string - format: date-time - description: Timestamp when the session was created - additionalProperties: false - required: - - session_id - - session_name - - turns - - started_at - title: Session - description: >- - A single session of an interaction with an Agentic System. - ShieldCallStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: shield_call - default: shield_call - violation: - $ref: '#/components/schemas/SafetyViolation' - description: The violation from the shield call. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - title: ShieldCallStep - description: A shield call step in an agent turn. - TextContentItem: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the content item. Always "text" - text: - type: string - description: Text content - additionalProperties: false - required: - - type - - text - title: TextContentItem - description: A text content item - ToolCall: - type: object - properties: - call_id: - type: string - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - arguments: - type: string - additionalProperties: false - required: - - call_id - - tool_name - - arguments - title: ToolCall - ToolExecutionStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: tool_execution - default: tool_execution - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: The tool calls to execute. - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: The tool responses from the tool calls. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - tool_calls - - tool_responses - title: ToolExecutionStep - description: A tool execution step in an agent turn. - ToolResponse: - type: object - properties: - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - description: Name of the tool that was invoked - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool response - additionalProperties: false - required: - - call_id - - tool_name - - content - title: ToolResponse - description: Response from a tool invocation. - ToolResponseMessage: - type: object - properties: - role: - type: string - const: tool - default: tool - description: >- - Must be "tool" to identify this as a tool response - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - additionalProperties: false - required: - - role - - call_id - - content - title: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - Turn: - type: object - properties: - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - session_id: - type: string - description: >- - Unique identifier for the conversation session - input_messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: >- - List of messages that initiated this turn - steps: - type: array - items: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - Ordered list of processing steps executed during this turn - output_message: - $ref: '#/components/schemas/CompletionMessage' - description: >- - The model's generated response containing content and metadata - output_attachments: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the attachment. - mime_type: - type: string - description: The MIME type of the attachment. - additionalProperties: false - required: - - content - - mime_type - title: Attachment - description: An attachment to an agent turn. - description: >- - (Optional) Files or media attached to the agent's response - started_at: - type: string - format: date-time - description: Timestamp when the turn began - completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the turn finished, if completed - additionalProperties: false - required: - - turn_id - - session_id - - input_messages - - steps - - output_message - - started_at - title: Turn - description: >- - A single turn in an interaction with an Agentic System. - URL: - type: object - properties: - uri: - type: string - description: The URL string pointing to the resource - additionalProperties: false - required: - - uri - title: URL - description: A URL reference to external content. - UserMessage: - type: object - properties: - role: - type: string - const: user - default: user - description: >- - Must be "user" to identify this as a user message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the message, which can include text and other media - context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) This field is used internally by Llama Stack to pass RAG context. - This field may be removed in the API in the future. - additionalProperties: false - required: - - role - - content - title: UserMessage - description: >- - A message from the user in a chat conversation. - ViolationLevel: - type: string - enum: - - info - - warn - - error - title: ViolationLevel - description: Severity level of a safety violation. - CreateAgentTurnRequest: - type: object - properties: - messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: List of messages to start the turn with. - stream: - type: boolean - description: >- - (Optional) If True, generate an SSE event stream of the response. Defaults - to False. - documents: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - additionalProperties: false - required: - - content - - mime_type - title: Document - description: A document to be used by an agent. - description: >- - (Optional) List of documents to create the turn with. - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - description: >- - (Optional) List of toolgroups to create the turn with, will be used in - addition to the agent's config toolgroups for the request. - tool_config: - $ref: '#/components/schemas/ToolConfig' - description: >- - (Optional) The tool configuration to create the turn with, will be used - to override the agent's tool_config. - additionalProperties: false - required: - - messages - title: CreateAgentTurnRequest - AgentTurnResponseEvent: - type: object - properties: - payload: - oneOf: - - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - discriminator: - propertyName: event_type - mapping: - step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' - step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' - step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' - turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' - turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - description: >- - Event-specific payload containing event data - additionalProperties: false - required: - - payload - title: AgentTurnResponseEvent - description: >- - An event in an agent turn response stream. - AgentTurnResponseStepCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_complete - default: step_complete - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - step_details: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: Complete details of the executed step - additionalProperties: false - required: - - event_type - - step_type - - step_id - - step_details - title: AgentTurnResponseStepCompletePayload - description: >- - Payload for step completion events in agent turn responses. - AgentTurnResponseStepProgressPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_progress - default: step_progress - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - delta: - oneOf: - - $ref: '#/components/schemas/TextDelta' - - $ref: '#/components/schemas/ImageDelta' - - $ref: '#/components/schemas/ToolCallDelta' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/TextDelta' - image: '#/components/schemas/ImageDelta' - tool_call: '#/components/schemas/ToolCallDelta' - description: >- - Incremental content changes during step execution - additionalProperties: false - required: - - event_type - - step_type - - step_id - - delta - title: AgentTurnResponseStepProgressPayload - description: >- - Payload for step progress events in agent turn responses. - AgentTurnResponseStepStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_start - default: step_start - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata for the step - additionalProperties: false - required: - - event_type - - step_type - - step_id - title: AgentTurnResponseStepStartPayload - description: >- - Payload for step start events in agent turn responses. - AgentTurnResponseStreamChunk: - type: object - properties: - event: - $ref: '#/components/schemas/AgentTurnResponseEvent' - description: >- - Individual event in the agent turn response stream - additionalProperties: false - required: - - event - title: AgentTurnResponseStreamChunk - description: Streamed agent turn completion response. - "AgentTurnResponseTurnAwaitingInputPayload": - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_awaiting_input - default: turn_awaiting_input - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Turn data when waiting for external tool responses - additionalProperties: false - required: - - event_type - - turn - title: >- - AgentTurnResponseTurnAwaitingInputPayload - description: >- - Payload for turn awaiting input events in agent turn responses. - AgentTurnResponseTurnCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_complete - default: turn_complete - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Complete turn data including all steps and results - additionalProperties: false - required: - - event_type - - turn - title: AgentTurnResponseTurnCompletePayload - description: >- - Payload for turn completion events in agent turn responses. - AgentTurnResponseTurnStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_start - default: turn_start - description: Type of event being reported - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - additionalProperties: false - required: - - event_type - - turn_id - title: AgentTurnResponseTurnStartPayload - description: >- - Payload for turn start events in agent turn responses. - ImageDelta: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the delta. Always "image" - image: - type: string - contentEncoding: base64 - description: The incremental image data as bytes - additionalProperties: false - required: - - type - - image - title: ImageDelta - description: >- - An image content delta for streaming responses. - TextDelta: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the delta. Always "text" - text: - type: string - description: The incremental text content - additionalProperties: false - required: - - type - - text - title: TextDelta - description: >- - A text content delta for streaming responses. - ToolCallDelta: - type: object - properties: - type: - type: string - const: tool_call - default: tool_call - description: >- - Discriminator type of the delta. Always "tool_call" - tool_call: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCall' - description: >- - Either an in-progress tool call string or the final parsed tool call - parse_status: - type: string - enum: - - started - - in_progress - - failed - - succeeded - description: Current parsing status of the tool call - additionalProperties: false - required: - - type - - tool_call - - parse_status - title: ToolCallDelta - description: >- - A tool call content delta for streaming responses. - ResumeAgentTurnRequest: - type: object - properties: - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: >- - The tool call responses to resume the turn with. - stream: - type: boolean - description: Whether to stream the response. - additionalProperties: false - required: - - tool_responses - title: ResumeAgentTurnRequest - AgentStepResponse: - type: object - properties: - step: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - The complete step data and execution details - additionalProperties: false - required: - - step - title: AgentStepResponse - description: >- - Response containing details of a specific agent step. - AppendRowsRequest: - type: object - properties: - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to append to the dataset. - additionalProperties: false - required: - - rows - title: AppendRowsRequest - Dataset: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: dataset - default: dataset - description: >- - Type of resource, always 'dataset' for datasets - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - Purpose of the dataset indicating its intended use - source: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - description: >- - Data source configuration for the dataset - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the dataset - additionalProperties: false - required: - - identifier - - provider_id - - type - - purpose - - source - - metadata - title: Dataset - description: >- - Dataset resource for storing and accessing training or evaluation data. - RowsDataSource: - type: object - properties: - type: - type: string - const: rows - default: rows - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}]} ] - additionalProperties: false - required: - - type - - rows - title: RowsDataSource - description: A dataset stored in rows. - URIDataSource: - type: object - properties: - type: - type: string - const: uri - default: uri - uri: - type: string - description: >- - The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" - additionalProperties: false - required: - - type - - uri - title: URIDataSource - description: >- - A dataset that can be obtained from a URI. - ListDatasetsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Dataset' - description: List of datasets - additionalProperties: false - required: - - data - title: ListDatasetsResponse - description: Response from listing datasets. - DataSource: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - RegisterDatasetRequest: - type: object - properties: - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - The purpose of the dataset. One of: - "post-training/messages": The dataset - contains a messages column with list of messages for post-training. { - "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", - "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset - contains a question column and an answer column for evaluation. { "question": - "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": - The dataset contains a messages column with list of messages and an answer - column for evaluation. { "messages": [ {"role": "user", "content": "Hello, - my name is John Doe."}, {"role": "assistant", "content": "Hello, John - Doe. How can I help you today?"}, {"role": "user", "content": "What's - my name?"}, ], "answer": "John Doe" } - source: - $ref: '#/components/schemas/DataSource' - description: >- - The data source of the dataset. Ensure that the data source schema is - compatible with the purpose of the dataset. Examples: - { "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": - "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" - } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" - } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": - "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] - } ] } - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The metadata for the dataset. - E.g. {"description": "My dataset"}. - dataset_id: - type: string - description: >- - The ID of the dataset. If not provided, an ID will be generated. - additionalProperties: false - required: - - purpose - - source - title: RegisterDatasetRequest - Benchmark: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: benchmark - default: benchmark - description: The resource type, always benchmark - dataset_id: - type: string - description: >- - Identifier of the dataset to use for the benchmark evaluation - scoring_functions: - type: array - items: - type: string - description: >- - List of scoring function identifiers to apply during evaluation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Metadata for this evaluation task - additionalProperties: false - required: - - identifier - - provider_id - - type - - dataset_id - - scoring_functions - - metadata - title: Benchmark - description: >- - A benchmark resource for evaluating model performance. - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - RegisterBenchmarkRequest: - type: object - properties: - benchmark_id: - type: string - description: The ID of the benchmark to register. - dataset_id: - type: string - description: >- - The ID of the dataset to use for the benchmark. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the benchmark. - provider_benchmark_id: - type: string - description: >- - The ID of the provider benchmark to use for the benchmark. - provider_id: - type: string - description: >- - The ID of the provider to use for the benchmark. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The metadata to use for the benchmark. - additionalProperties: false - required: - - benchmark_id - - dataset_id - - scoring_functions - title: RegisterBenchmarkRequest - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - AggregationFunctionType: - type: string - enum: - - average - - weighted_average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: >- - Types of aggregation functions for scoring results. - BasicScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: basic - default: basic - description: >- - The type of scoring function parameters, always basic - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - aggregation_functions - title: BasicScoringFnParams - description: >- - Parameters for basic scoring function configuration. - BenchmarkConfig: - type: object - properties: - eval_candidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - description: The candidate to evaluate. - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - Map between scoring function id and parameters for each scoring function - you want to run - num_examples: - type: integer - description: >- - (Optional) The number of examples to evaluate. If not provided, all examples - in the dataset will be evaluated - additionalProperties: false - required: - - eval_candidate - - scoring_params - title: BenchmarkConfig - description: >- - A benchmark configuration for evaluation. - LLMAsJudgeScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: llm_as_judge - default: llm_as_judge - description: >- - The type of scoring function parameters, always llm_as_judge - judge_model: - type: string - description: >- - Identifier of the LLM model to use as a judge for scoring - prompt_template: - type: string - description: >- - (Optional) Custom prompt template for the judge model - judge_score_regexes: - type: array - items: - type: string - description: >- - Regexes to extract the answer from generated response - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - judge_model - - judge_score_regexes - - aggregation_functions - title: LLMAsJudgeScoringFnParams - description: >- - Parameters for LLM-as-judge scoring function configuration. - ModelCandidate: - type: object - properties: - type: - type: string - const: model - default: model - model: - type: string - description: The model ID to evaluate. - sampling_params: - $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. - system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false - required: - - type - - model - - sampling_params - title: ModelCandidate - description: A model candidate for evaluation. - RegexParserScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: regex_parser - default: regex_parser - description: >- - The type of scoring function parameters, always regex_parser - parsing_regexes: - type: array - items: - type: string - description: >- - Regex to extract the answer from generated response - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - parsing_regexes - - aggregation_functions - title: RegexParserScoringFnParams - description: >- - Parameters for regex parser scoring function configuration. - ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' - discriminator: - propertyName: type - mapping: - llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' - regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' - ScoringFnParamsType: - type: string - enum: - - llm_as_judge - - regex_parser - - basic - title: ScoringFnParamsType - description: >- - Types of scoring function parameter configurations. - SystemMessage: - type: object - properties: - role: - type: string - const: system - default: system - description: >- - Must be "system" to identify this as a system message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). - additionalProperties: false - required: - - role - - content - title: SystemMessage - description: >- - A system message providing instructions or context to the model. - EvaluateRowsRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to evaluate. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the evaluation. - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - input_rows - - scoring_functions - - benchmark_config - title: EvaluateRowsRequest - EvaluateResponse: - type: object - properties: - generations: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The generations from the evaluation. - scores: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: The scores from the evaluation. - additionalProperties: false - required: - - generations - - scores - title: EvaluateResponse - description: The response from an evaluation. - ScoringResult: - type: object - properties: - score_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. - aggregated_results: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false - required: - - score_rows - - aggregated_results - title: ScoringResult - description: A scoring result for a single row. - RunEvalRequest: - type: object - properties: - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - benchmark_config - title: RunEvalRequest - Job: - type: object - properties: - job_id: - type: string - description: Unique identifier for the job - status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current execution status of the job - additionalProperties: false - required: - - job_id - - status - title: Job - description: >- - A job execution instance with status tracking. - Order: - type: string - enum: - - asc - - desc - title: Order - description: Sort order for paginated responses. - ListOpenAIChatCompletionResponse: - type: object - properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - description: >- - List of chat completion objects with their input messages - has_more: - type: boolean - description: >- - Whether there are more completions available beyond this list - first_id: - type: string - description: ID of the first completion in this list - last_id: - type: string - description: ID of the last completion in this list object: type: string const: list + title: Object default: list - description: >- - Must be "list" to identify this as a list response - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIChatCompletionResponse - description: >- - Response from listing OpenAI-compatible chat completions. - OpenAIAssistantMessageParam: + data: + items: + $ref: '#/components/schemas/Batch' + type: array + title: Data + description: List of batch objects + first_id: + anyOf: + - type: string + - type: 'null' + description: ID of the first batch in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: ID of the last batch in the list + has_more: + type: boolean + title: Has More + description: Whether there are more batches available + default: false type: object + required: + - data + title: ListBatchesResponse + description: Response containing a list of batch objects. + CreateBatchRequest: + properties: + input_file_id: + type: string + title: Input File Id + endpoint: + type: string + title: Endpoint + completion_window: + type: string + const: 24h + title: Completion Window + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + idempotency_key: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + properties: + id: + type: string + title: Id + completion_window: + type: string + title: Completion Window + created_at: + type: integer + title: Created At + endpoint: + type: string + title: Endpoint + input_file_id: + type: string + title: Input File Id + object: + type: string + const: batch + title: Object + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + title: Status + cancelled_at: + anyOf: + - type: integer + - type: 'null' + cancelling_at: + anyOf: + - type: integer + - type: 'null' + completed_at: + anyOf: + - type: integer + - type: 'null' + error_file_id: + anyOf: + - type: string + - type: 'null' + errors: + anyOf: + - $ref: '#/components/schemas/Errors' + title: Errors + - type: 'null' + title: Errors + expired_at: + anyOf: + - type: integer + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + failed_at: + anyOf: + - type: integer + - type: 'null' + finalizing_at: + anyOf: + - type: integer + - type: 'null' + in_progress_at: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + model: + anyOf: + - type: string + - type: 'null' + output_file_id: + anyOf: + - type: string + - type: 'null' + request_counts: + anyOf: + - $ref: '#/components/schemas/BatchRequestCounts' + title: BatchRequestCounts + - type: 'null' + title: BatchRequestCounts + usage: + anyOf: + - $ref: '#/components/schemas/BatchUsage' + title: BatchUsage + - type: 'null' + title: BatchUsage + additionalProperties: true + type: object + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIChatCompletionResponse + description: Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. properties: role: - type: string const: assistant default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the model's response - name: + title: Role type: string - description: >- - (Optional) The name of the assistant message participant. + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + nullable: true + name: + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: >- - List of tool calls. Each tool call is an OpenAIChatCompletionToolCall - object. - additionalProperties: false - required: - - role + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true title: OpenAIAssistantMessageParam - description: >- - A message containing the model's (assistant) response in an OpenAI-compatible - chat completion request. - "OpenAIChatCompletionContentPartImageParam": type: object + OpenAIChatCompletionContentPartImageParam: properties: type: type: string const: image_url + title: Type default: image_url - description: >- - Must be "image_url" to identify this as image content image_url: $ref: '#/components/schemas/OpenAIImageURL' - description: >- - Image URL specification and processing details - additionalProperties: false - required: - - type - - image_url - title: >- - OpenAIChatCompletionContentPartImageParam - description: >- - Image content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionContentPartParam: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - - $ref: '#/components/schemas/OpenAIFile' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - file: '#/components/schemas/OpenAIFile' - OpenAIChatCompletionContentPartTextParam: type: object + required: + - image_url + title: OpenAIChatCompletionContentPartImageParam + description: Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + OpenAIChatCompletionContentPartTextParam: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to identify this as text content text: type: string - description: The text content of the message - additionalProperties: false - required: - - type - - text - title: OpenAIChatCompletionContentPartTextParam - description: >- - Text content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionToolCall: + title: Text type: object + required: + - text + title: OpenAIChatCompletionContentPartTextParam + description: Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: properties: index: - type: integer - description: >- - (Optional) Index of the tool call in the list + anyOf: + - type: integer + - type: 'null' id: - type: string - description: >- - (Optional) Unique identifier for the tool call + anyOf: + - type: string + - type: 'null' type: type: string const: function + title: Type default: function - description: >- - Must be "function" to identify this as a function call function: - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' - description: (Optional) Function call details - additionalProperties: false - required: - - type - title: OpenAIChatCompletionToolCall - description: >- - Tool call specification for OpenAI-compatible chat completion responses. - OpenAIChatCompletionToolCallFunction: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + title: OpenAIChatCompletionToolCallFunction + - type: 'null' + title: OpenAIChatCompletionToolCallFunction type: object + title: OpenAIChatCompletionToolCall + description: Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: properties: name: - type: string - description: (Optional) Name of the function to call + anyOf: + - type: string + - type: 'null' arguments: - type: string - description: >- - (Optional) Arguments to pass to the function as a JSON string - additionalProperties: false - title: OpenAIChatCompletionToolCallFunction - description: >- - Function call details for OpenAI-compatible tool calls. - OpenAIChatCompletionUsage: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIChatCompletionToolCallFunction + description: Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: properties: prompt_tokens: type: integer - description: Number of tokens in the prompt + title: Prompt Tokens completion_tokens: type: integer - description: Number of tokens in the completion + title: Completion Tokens total_tokens: type: integer - description: Total tokens used (prompt + completion) + title: Total Tokens prompt_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - title: >- - OpenAIChatCompletionUsagePromptTokensDetails - description: >- - Token details for prompt tokens in OpenAI chat completion usage. + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails' + title: OpenAIChatCompletionUsagePromptTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsagePromptTokensDetails completion_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - title: >- - OpenAIChatCompletionUsageCompletionTokensDetails - description: >- - Token details for output tokens in OpenAI chat completion usage. - additionalProperties: false - required: - - prompt_tokens - - completion_tokens - - total_tokens - title: OpenAIChatCompletionUsage - description: >- - Usage information for OpenAI chat completion. - OpenAIChoice: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails' + title: OpenAIChatCompletionUsageCompletionTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsageCompletionTokensDetails type: object + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: Usage information for OpenAI chat completion. + OpenAIChoice: properties: message: oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam-Output | ... (5 variants) discriminator: propertyName: role mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' developer: '#/components/schemas/OpenAIDeveloperMessageParam' - description: The message from the model + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' finish_reason: type: string - description: The reason the model stopped generating + title: Finish Reason index: type: integer - description: The index of the choice + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - required: - - message - - finish_reason - - index - title: OpenAIChoice - description: >- - A choice from an OpenAI-compatible chat completion response. - OpenAIChoiceLogprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: properties: content: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' refusal: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - title: OpenAIChoiceLogprobs - description: >- - The log probabilities for the tokens in the message from an OpenAI-compatible - chat completion response. - OpenAIDeveloperMessageParam: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' type: object + title: OpenAIChoiceLogprobs + description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + OpenAIDeveloperMessageParam: properties: role: type: string const: developer + title: Role default: developer - description: >- - Must be "developer" to identify this as a developer message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the developer message + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the developer message participant. - additionalProperties: false - required: - - role - - content - title: OpenAIDeveloperMessageParam - description: >- - A message from the developer in an OpenAI-compatible chat completion request. - OpenAIFile: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAIDeveloperMessageParam + description: A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: properties: type: type: string const: file + title: Type default: file file: $ref: '#/components/schemas/OpenAIFileFile' - additionalProperties: false + type: object required: - - type - - file + - file title: OpenAIFile OpenAIFileFile: - type: object properties: file_data: - type: string + anyOf: + - type: string + - type: 'null' file_id: - type: string + anyOf: + - type: string + - type: 'null' filename: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object title: OpenAIFileFile OpenAIImageURL: - type: object properties: url: type: string - description: >- - URL of the image to include in the message + title: Url detail: - type: string - description: >- - (Optional) Level of detail for image processing. Can be "low", "high", - or "auto" - additionalProperties: false - required: - - url - title: OpenAIImageURL - description: >- - Image URL specification for OpenAI-compatible chat completion messages. - OpenAIMessageParam: - oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' - developer: '#/components/schemas/OpenAIDeveloperMessageParam' - OpenAISystemMessageParam: + anyOf: + - type: string + - type: 'null' type: object + required: + - url + title: OpenAIImageURL + description: Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + discriminator: + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam' + propertyName: role + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + title: OpenAIUserMessageParam + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + title: OpenAIAssistantMessageParam + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam | ... (5 variants) + OpenAISystemMessageParam: properties: role: type: string const: system + title: Role default: system - description: >- - Must be "system" to identify this as a system message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the system message participant. - additionalProperties: false - required: - - role - - content - title: OpenAISystemMessageParam - description: >- - A system message providing instructions or context to the model. - OpenAITokenLogProb: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAISystemMessageParam + description: A system message providing instructions or context to the model. + OpenAITokenLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number + title: Logprob top_logprobs: - type: array items: $ref: '#/components/schemas/OpenAITopLogProb' - additionalProperties: false - required: - - token - - logprob - - top_logprobs - title: OpenAITokenLogProb - description: >- - The log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIToolMessageParam: + type: array + title: Top Logprobs type: object + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: |- + The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + OpenAIToolMessageParam: properties: role: type: string const: tool + title: Role default: tool - description: >- - Must be "tool" to identify this as a tool response tool_call_id: type: string - description: >- - Unique identifier for the tool call this response is for + title: Tool Call Id content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The response content from the tool - additionalProperties: false - required: - - role - - tool_call_id - - content - title: OpenAIToolMessageParam - description: >- - A message representing the result of a tool invocation in an OpenAI-compatible - chat completion request. - OpenAITopLogProb: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] type: object + required: + - tool_call_id + - content + title: OpenAIToolMessageParam + description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + OpenAITopLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number - additionalProperties: false - required: - - token - - logprob - title: OpenAITopLogProb - description: >- - The top log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIUserMessageParam: + title: Logprob type: object + required: + - token + - logprob + title: OpenAITopLogProb + description: |- + The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + OpenAIUserMessageParam: + description: A message from the user in an OpenAI-compatible chat completion request. properties: role: - type: string const: user default: user - description: >- - Must be "user" to identify this as a user message - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' - description: >- - The content of the message, which can include text and other media - name: + title: Role type: string - description: >- - (Optional) The name of the user message participant. - additionalProperties: false + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + nullable: true required: - - role - - content + - content title: OpenAIUserMessageParam - description: >- - A message from the user in an OpenAI-compatible chat completion request. - OpenAIJSONSchema: type: object + OpenAIJSONSchema: properties: name: type: string - description: Name of the schema + title: Name description: - type: string - description: (Optional) Description of the schema + anyOf: + - type: string + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict adherence to the schema + anyOf: + - type: boolean + - type: 'null' schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The JSON schema definition - additionalProperties: false - required: - - name - title: OpenAIJSONSchema - description: >- - JSON schema specification for OpenAI-compatible structured response format. - OpenAIResponseFormatJSONObject: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: OpenAIJSONSchema + description: JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: properties: type: type: string const: json_object + title: Type default: json_object - description: >- - Must be "json_object" to indicate generic JSON object response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatJSONObject - description: >- - JSON object response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatJSONSchema: type: object + title: OpenAIResponseFormatJSONObject + description: JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: properties: type: type: string const: json_schema + title: Type default: json_schema - description: >- - Must be "json_schema" to indicate structured JSON response format json_schema: $ref: '#/components/schemas/OpenAIJSONSchema' - description: >- - The JSON schema specification for the response - additionalProperties: false - required: - - type - - json_schema - title: OpenAIResponseFormatJSONSchema - description: >- - JSON schema response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatParam: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseFormatText' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIResponseFormatText' - json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' - json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' - OpenAIResponseFormatText: type: object + required: + - json_schema + title: OpenAIResponseFormatJSONSchema + description: JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + discriminator: + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + OpenAIResponseFormatText: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to indicate plain text response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatText - description: >- - Text response format for OpenAI-compatible chat completion requests. - OpenAIChatCompletionRequestWithExtraBody: type: object + title: OpenAIResponseFormatText + description: Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: List of messages in the conversation. - frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - function_call: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The function call to use. - functions: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) List of functions to use. - logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. - logprobs: - type: boolean - description: (Optional) The log probabilities to use. - max_completion_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - n: - type: integer - description: >- - (Optional) The number of completions to generate. - parallel_tool_calls: - type: boolean - description: >- - (Optional) Whether to parallelize tool calls. - presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - response_format: - $ref: '#/components/schemas/OpenAIResponseFormatParam' - description: (Optional) The response format to use. - seed: - type: integer - description: (Optional) The seed to use. - stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. - stream: - type: boolean - description: >- - (Optional) Whether to stream the response. - stream_options: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. - temperature: - type: number - description: (Optional) The temperature to use. - tool_choice: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tool choice to use. - tools: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) type: array - items: + minItems: 1 + title: Messages + frequency_penalty: + anyOf: + - type: number + - type: 'null' + function_call: + anyOf: + - type: string + - additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tools to use. + - type: 'null' + title: string | object + functions: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_completion_tokens: + anyOf: + - type: integer + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + response_format: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + discriminator: + propertyName: type + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + - type: 'null' + title: Response Format + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + tools: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' top_logprobs: - type: integer - description: >- - (Optional) The top log probabilities to use. + anyOf: + - type: integer + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. - additionalProperties: false - required: - - model - - messages - title: OpenAIChatCompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible chat completion endpoint. - OpenAIChatCompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - additionalProperties: false - required: - - id - - choices - - object - - created - - model - title: OpenAIChatCompletion - description: >- - Response from an OpenAI-compatible chat completion request. - OpenAIChatCompletionChunk: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage type: object + required: + - id + - choices + - created + - model + title: OpenAIChatCompletion + description: Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + description: Chunk from a streaming response to an OpenAI-compatible chat completion request. properties: id: + title: Id type: string - description: The ID of the chat completion choices: - type: array items: $ref: '#/components/schemas/OpenAIChunkChoice' - description: List of choices + title: Choices + type: array object: - type: string const: chat.completion.chunk default: chat.completion.chunk - description: >- - The object type, which will be "chat.completion.chunk" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: + title: Object + type: string + created: + title: Created + type: integer + model: + title: Model type: string - description: >- - The model that was used to generate the chat completion usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information (typically included in final chunk with stream_options) - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + nullable: true + title: OpenAIChatCompletionUsage required: - - id - - choices - - object - - created - - model + - id + - choices + - created + - model title: OpenAIChatCompletionChunk - description: >- - Chunk from a streaming response to an OpenAI-compatible chat completion request. - OpenAIChoiceDelta: type: object + OpenAIChoiceDelta: + description: A delta from an OpenAI-compatible chat completion streaming response. properties: content: - type: string - description: (Optional) The content of the delta + anyOf: + - type: string + - type: 'null' + nullable: true refusal: - type: string - description: (Optional) The refusal of the delta + anyOf: + - type: string + - type: 'null' + nullable: true role: - type: string - description: (Optional) The role of the delta + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: (Optional) The tool calls of the delta + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true reasoning_content: - type: string - description: >- - (Optional) The reasoning content from the model (non-standard, for o1/o3 - models) - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true title: OpenAIChoiceDelta - description: >- - A delta from an OpenAI-compatible chat completion streaming response. - OpenAIChunkChoice: type: object + OpenAIChunkChoice: + description: A chunk choice from an OpenAI-compatible chat completion streaming response. properties: delta: $ref: '#/components/schemas/OpenAIChoiceDelta' - description: The delta from the chunk finish_reason: + title: Finish Reason type: string - description: The reason the model stopped generating index: + title: Index type: integer - description: The index of the choice logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + nullable: true + title: OpenAIChoiceLogprobs required: - - delta - - finish_reason - - index + - delta + - finish_reason + - index title: OpenAIChunkChoice - description: >- - A chunk choice from an OpenAI-compatible chat completion streaming response. - OpenAICompletionWithInputMessages: type: object + OpenAICompletionWithInputMessages: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage input_messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output | ... (5 variants) + type: array + title: Input Messages + type: object required: - - id - - choices - - object - - created - - model - - input_messages + - id + - choices + - created + - model + - input_messages title: OpenAICompletionWithInputMessages OpenAICompletionRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model prompt: - oneOf: - - type: string - - type: array - items: - type: string - - type: array + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - items: + type: integer + type: array + title: list[integer] + - items: items: type: integer - - type: array - items: - type: array - items: - type: integer - description: The prompt to generate a completion for. + type: array + type: array + title: list[array] + title: string | ... (4 variants) best_of: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' echo: - type: boolean - description: (Optional) Whether to echo the prompt. + anyOf: + - type: boolean + - type: 'null' frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' logprobs: - type: boolean - description: (Optional) The log probabilities to use. + anyOf: + - type: boolean + - type: 'null' max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. + anyOf: + - type: integer + - type: 'null' n: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' seed: - type: integer - description: (Optional) The seed to use. + anyOf: + - type: integer + - type: 'null' stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] stream: - type: boolean - description: >- - (Optional) Whether to stream the response. + anyOf: + - type: boolean + - type: 'null' stream_options: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. + anyOf: + - additionalProperties: true + type: object + - type: 'null' temperature: - type: number - description: (Optional) The temperature to use. + anyOf: + - type: number + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. + anyOf: + - type: string + - type: 'null' suffix: - type: string - description: >- - (Optional) The suffix that should be appended to the completion. - additionalProperties: false - required: - - model - - prompt - title: OpenAICompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible completion endpoint. - OpenAICompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: properties: id: type: string + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAICompletionChoice' + type: array + title: Choices created: type: integer + title: Created model: type: string + title: Model object: type: string const: text_completion + title: Object default: text_completion - additionalProperties: false - required: - - id - - choices - - created - - model - - object - title: OpenAICompletion - description: >- - Response from an OpenAI-compatible completion request. - OpenAICompletionChoice: type: object + required: + - id + - choices + - created + - model + title: OpenAICompletion + description: |- + Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + OpenAICompletionChoice: properties: finish_reason: type: string + title: Finish Reason text: type: string + title: Text index: type: integer + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - additionalProperties: false - required: - - finish_reason - - text - - index - title: OpenAICompletionChoice - description: >- - A choice from an OpenAI-compatible completion response. - OpenAIEmbeddingsRequestWithExtraBody: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - finish_reason + - text + - index + title: OpenAICompletionChoice + description: |- + A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice + ConversationItem: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + OpenAIResponseAnnotationCitation: + properties: + type: + type: string + const: url_citation + title: Type + default: url_citation + end_index: + type: integer + title: End Index + start_index: + type: integer + title: Start Index + title: + type: string + title: Title + url: + type: string + title: Url + type: object + required: + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: URL citation annotation for referencing external web resources. + OpenAIResponseAnnotationContainerFileCitation: + properties: + type: + type: string + const: container_file_citation + title: Type + default: container_file_citation + container_id: + type: string + title: Container Id + end_index: + type: integer + title: End Index + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + start_index: + type: integer + title: Start Index + type: object + required: + - container_id + - end_index + - file_id + - filename + - start_index + title: OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + properties: + type: + type: string + const: file_citation + title: Type + default: file_citation + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + index: + type: integer + title: Index + type: object + required: + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: + properties: + type: + type: string + const: file_path + title: Type + default: file_path + file_id: + type: string + title: File Id + index: + type: integer + title: Index + type: object + required: + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + OpenAIResponseContentPartRefusal: + properties: + type: + type: string + const: refusal + title: Type + default: refusal + refusal: + type: string + title: Refusal + type: object + required: + - refusal + title: OpenAIResponseContentPartRefusal + description: Refusal content within a streamed response part. + OpenAIResponseInputFunctionToolCallOutput: + properties: + call_id: + type: string + title: Call Id + output: + type: string + title: Output + type: + type: string + const: function_call_output + title: Type + default: function_call_output + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - call_id + - output + title: OpenAIResponseInputFunctionToolCallOutput + description: This represents the output of a function call that gets passed back to the model. + OpenAIResponseInputMessageContent: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + OpenAIResponseInputMessageContentFile: + properties: + type: + type: string + const: input_file + title: Type + default: input_file + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + file_url: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentFile + description: File content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentImage: + properties: + detail: + title: Detail + default: auto + type: string + enum: + - low + - high + - auto + type: + type: string + const: input_image + title: Type + default: input_image + file_id: + anyOf: + - type: string + - type: 'null' + image_url: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentImage + description: Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: + properties: + text: + type: string + title: Text + type: + type: string + const: input_text + title: Type + default: input_text + type: object + required: + - text + title: OpenAIResponseInputMessageContentText + description: Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + properties: + arguments: + type: string + title: Arguments + id: + type: string + title: Id + name: + type: string + title: Name + server_label: + type: string + title: Server Label + type: + type: string + const: mcp_approval_request + title: Type + default: mcp_approval_request + type: object + required: + - arguments + - id + - name + - server_label + title: OpenAIResponseMCPApprovalRequest + description: A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + properties: + approval_request_id: + type: string + title: Approval Request Id + approve: + type: boolean + title: Approve + type: + type: string + const: mcp_approval_response + title: Type + default: mcp_approval_response + id: + anyOf: + - type: string + - type: 'null' + reason: + anyOf: + - type: string + - type: 'null' + type: object + required: + - approval_request_id + - approve + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. + OpenAIResponseMessage: + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + properties: + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + const: message + default: message + title: Type + type: string + id: + anyOf: + - type: string + - type: 'null' + nullable: true + status: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - content + - role + title: OpenAIResponseMessage + type: object + OpenAIResponseOutputMessageContent: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + OpenAIResponseOutputMessageContentOutputText: + properties: + text: + type: string + title: Text + type: + type: string + const: output_text + title: Type + default: output_text + annotations: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + discriminator: + propertyName: type + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + type: array + title: Annotations + type: object + required: + - text + title: OpenAIResponseOutputMessageContentOutputText + OpenAIResponseOutputMessageFileSearchToolCall: + properties: + id: + type: string + title: Id + queries: + items: + type: string + type: array + title: Queries + status: + type: string + title: Status + type: + type: string + const: file_search_call + title: Type + default: file_search_call + results: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults' + type: array + - type: 'null' + type: object + required: + - id + - queries + - status + title: OpenAIResponseOutputMessageFileSearchToolCall + description: File search tool call output message for OpenAI responses. + OpenAIResponseOutputMessageFunctionToolCall: + properties: + call_id: + type: string + title: Call Id + name: + type: string + title: Name + arguments: + type: string + title: Arguments + type: + type: string + const: function_call + title: Type + default: function_call + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - call_id + - name + - arguments + title: OpenAIResponseOutputMessageFunctionToolCall + description: Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: + properties: + id: + type: string + title: Id + type: + type: string + const: mcp_call + title: Type + default: mcp_call + arguments: + type: string + title: Arguments + name: + type: string + title: Name + server_label: + type: string + title: Server Label + error: + anyOf: + - type: string + - type: 'null' + output: + anyOf: + - type: string + - type: 'null' + type: object + required: + - id + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: + properties: + id: + type: string + title: Id + type: + type: string + const: mcp_list_tools + title: Type + default: mcp_list_tools + server_label: + type: string + title: Server Label + tools: + items: + $ref: '#/components/schemas/MCPListToolsTool' + type: array + title: Tools + type: object + required: + - id + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: MCP list tools output message containing available tools from an MCP server. + OpenAIResponseOutputMessageWebSearchToolCall: + properties: + id: + type: string + title: Id + status: + type: string + title: Status + type: + type: string + const: web_search_call + title: Type + default: web_search_call + type: object + required: + - id + - status + title: OpenAIResponseOutputMessageWebSearchToolCall + description: Web search tool call output message for OpenAI responses. + CreateConversationRequest: + properties: + items: + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object + title: CreateConversationRequest + Conversation: + properties: + id: + type: string + title: Id + description: The unique ID of the conversation. + object: + type: string + const: conversation + title: Object + description: The object type, which is always conversation. + default: conversation + created_at: + type: integer + title: Created At + description: The time at which the conversation was created, measured in seconds since the Unix epoch. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. + items: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + type: object + required: + - id + - created_at + title: Conversation + description: OpenAI-compatible conversation object. + UpdateConversationRequest: + properties: + metadata: + additionalProperties: + type: string + type: object + title: Metadata + type: object + required: + - metadata + title: UpdateConversationRequest + ConversationDeletedResource: + properties: + id: + type: string + title: Id + description: The deleted conversation identifier + object: + type: string + title: Object + description: Object type + default: conversation.deleted + deleted: + type: boolean + title: Deleted + description: Whether the object was deleted + default: true + type: object + required: + - id + title: ConversationDeletedResource + description: Response for deleted conversation. + ConversationItemList: + properties: + object: + type: string + title: Object + description: Object type + default: list + data: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (9 variants) + type: array + title: Data + description: List of conversation items + first_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the first item in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the last item in the list + has_more: + type: boolean + title: Has More + description: Whether there are more items available + default: false + type: object + required: + - data + title: ConversationItemList + description: List of conversation items with pagination. + AddItemsRequest: + properties: + items: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + title: Items + type: object + required: + - items + title: AddItemsRequest + ConversationItemDeletedResource: + properties: + id: + type: string + title: Id + description: The deleted item identifier + object: + type: string + title: Object + description: Object type + default: conversation.item.deleted + deleted: + type: boolean + title: Deleted + description: Whether the object was deleted + default: true + type: object + required: + - id + title: ConversationItemDeletedResource + description: Response for deleted conversation item. + OpenAIEmbeddingsRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the model to use. The model must be an embedding model - registered with Llama Stack and available via the /models endpoint. + title: Model input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input text to embed, encoded as a string or array of strings. To embed - multiple inputs in a single request, pass an array of strings. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] encoding_format: - type: string + anyOf: + - type: string + - type: 'null' default: float - description: >- - (Optional) The format to return the embeddings in. Can be either "float" - or "base64". Defaults to "float". dimensions: - type: integer - description: >- - (Optional) The number of dimensions the resulting output embeddings should - have. Only supported in text-embedding-3 and later models. + anyOf: + - type: integer + - type: 'null' user: - type: string - description: >- - (Optional) A unique identifier representing your end-user, which can help - OpenAI to monitor and detect abuse. - additionalProperties: false - required: - - model - - input - title: OpenAIEmbeddingsRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible embeddings endpoint. - OpenAIEmbeddingData: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: properties: object: type: string const: embedding + title: Object default: embedding - description: >- - The object type, which will be "embedding" embedding: - oneOf: - - type: array - items: - type: number - - type: string - description: >- - The embedding vector as a list of floats (when encoding_format="float") - or as a base64-encoded string (when encoding_format="base64") + anyOf: + - items: + type: number + type: array + title: list[number] + - type: string + title: list[number] | string index: type: integer - description: >- - The index of the embedding in the input list - additionalProperties: false - required: - - object - - embedding - - index - title: OpenAIEmbeddingData - description: >- - A single embedding data object from an OpenAI-compatible embeddings response. - OpenAIEmbeddingUsage: + title: Index type: object + required: + - embedding + - index + title: OpenAIEmbeddingData + description: A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: properties: prompt_tokens: type: integer - description: The number of tokens in the input + title: Prompt Tokens total_tokens: type: integer - description: The total number of tokens used - additionalProperties: false - required: - - prompt_tokens - - total_tokens - title: OpenAIEmbeddingUsage - description: >- - Usage information for an OpenAI-compatible embeddings response. - OpenAIEmbeddingsResponse: + title: Total Tokens type: object + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: properties: object: type: string const: list + title: Object default: list - description: The object type, which will be "list" data: - type: array items: $ref: '#/components/schemas/OpenAIEmbeddingData' - description: List of embedding data objects + type: array + title: Data model: type: string - description: >- - The model that was used to generate the embeddings + title: Model usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' - description: Usage information - additionalProperties: false + type: object required: - - object - - data - - model - - usage + - data + - model + - usage title: OpenAIEmbeddingsResponse - description: >- - Response from an OpenAI-compatible embeddings request. + description: Response from an OpenAI-compatible embeddings request. OpenAIFilePurpose: type: string enum: - - assistants - - batch + - assistants + - batch title: OpenAIFilePurpose - description: >- - Valid purpose values for OpenAI Files API. + description: Valid purpose values for OpenAI Files API. ListOpenAIFileResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/OpenAIFileObject' - description: List of file objects + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more files available beyond this page + title: Has More first_id: type: string - description: >- - ID of the first file in the list for pagination + title: First Id last_id: type: string - description: >- - ID of the last file in the list for pagination + title: Last Id object: type: string const: list + title: Object default: list - description: The object type, which is always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIFileResponse - description: >- - Response for listing files in OpenAI Files API. - OpenAIFileObject: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIFileResponse + description: Response for listing files in OpenAI Files API. + OpenAIFileObject: properties: object: type: string const: file + title: Object default: file - description: The object type, which is always "file" id: type: string - description: >- - The file identifier, which can be referenced in the API endpoints + title: Id bytes: type: integer - description: The size of the file, in bytes + title: Bytes created_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file was created + title: Created At expires_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file expires + title: Expires At filename: type: string - description: The name of the file + title: Filename purpose: - type: string - enum: - - assistants - - batch - description: The intended purpose of the file - additionalProperties: false - required: - - object - - id - - bytes - - created_at - - expires_at - - filename - - purpose - title: OpenAIFileObject - description: >- - OpenAI File object as defined in the OpenAI Files API. - ExpiresAfter: + $ref: '#/components/schemas/OpenAIFilePurpose' type: object + required: + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: properties: anchor: type: string const: created_at + title: Anchor seconds: type: integer - additionalProperties: false + maximum: 2592000.0 + minimum: 3600.0 + title: Seconds + type: object required: - - anchor - - seconds + - anchor + - seconds title: ExpiresAfter - description: >- + description: |- Control expiration of uploaded files. Params: - anchor, must be "created_at" - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) OpenAIFileDeleteResponse: - type: object properties: id: type: string - description: The file identifier that was deleted + title: Id object: type: string const: file + title: Object default: file - description: The object type, which is always "file" deleted: type: boolean - description: >- - Whether the file was successfully deleted - additionalProperties: false + title: Deleted + type: object required: - - id - - object - - deleted + - id + - deleted title: OpenAIFileDeleteResponse - description: >- - Response for deleting a file in OpenAI Files API. + description: Response for deleting a file in OpenAI Files API. Response: - type: object title: Response - OpenAIModel: type: object + HealthInfo: + properties: + status: + $ref: '#/components/schemas/HealthStatus' + type: object + required: + - status + title: HealthInfo + description: Health status information for the service. + RouteInfo: + properties: + route: + type: string + title: Route + method: + type: string + title: Method + provider_types: + items: + type: string + type: array + title: Provider Types + type: object + required: + - route + - method + - provider_types + title: RouteInfo + description: Information about an API route including its path, method, and implementing providers. + ListRoutesResponse: + properties: + data: + items: + $ref: '#/components/schemas/RouteInfo' + type: array + title: Data + type: object + required: + - data + title: ListRoutesResponse + description: Response containing a list of all available API routes. + OpenAIModel: properties: id: type: string + title: Id object: type: string const: model + title: Object default: model created: type: integer + title: Created owned_by: type: string - additionalProperties: false - required: - - id - - object - - created - - owned_by - title: OpenAIModel - description: A model from OpenAI. - OpenAIListModelsResponse: + title: Owned By + custom_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - id + - created + - owned_by + title: OpenAIModel + description: |- + A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + OpenAIListModelsResponse: properties: data: - type: array items: $ref: '#/components/schemas/OpenAIModel' - additionalProperties: false - required: - - data - title: OpenAIListModelsResponse - RunModerationRequest: + type: array + title: Data type: object + required: + - data + title: OpenAIListModelsResponse + Model: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: model + title: Type + default: model + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + type: object + required: + - identifier + - provider_id + title: Model + description: A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + - rerank + title: ModelType + description: Enumeration of supported model types in Llama Stack. + RunModerationRequest: properties: input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input (or inputs) to classify. Can be a single string, an array of strings, - or an array of multi-modal input objects similar to other models. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] model: - type: string - description: >- - The content moderation model you would like to use. - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - input - - model + - input title: RunModerationRequest ModerationObject: - type: object properties: id: type: string - description: >- - The unique identifier for the moderation request. + title: Id model: type: string - description: >- - The model used to generate the moderation results. + title: Model results: - type: array items: $ref: '#/components/schemas/ModerationObjectResults' - description: A list of moderation objects - additionalProperties: false + type: array + title: Results + type: object required: - - id - - model - - results + - id + - model + - results title: ModerationObject description: A moderation object. ModerationObjectResults: - type: object properties: flagged: type: boolean - description: >- - Whether any of the below categories are flagged. + title: Flagged categories: - type: object - additionalProperties: - type: boolean - description: >- - A list of the categories, and whether they are flagged or not. + anyOf: + - additionalProperties: + type: boolean + type: object + - type: 'null' category_applied_input_types: - type: object - additionalProperties: - type: array - items: - type: string - description: >- - A list of the categories along with the input type(s) that the score applies - to. + anyOf: + - additionalProperties: + items: + type: string + type: array + type: object + - type: 'null' category_scores: - type: object - additionalProperties: - type: number - description: >- - A list of the categories along with their scores as predicted by model. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' user_message: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false + title: Metadata + type: object required: - - flagged - - metadata + - flagged title: ModerationObjectResults description: A moderation object. - ListOpenAIResponseObject: + Prompt: + properties: + prompt: + anyOf: + - type: string + - type: 'null' + description: The system prompt with variable placeholders + version: + type: integer + minimum: 1.0 + title: Version + description: Version (integer starting at 1, incremented on save) + prompt_id: + type: string + title: Prompt Id + description: Unique identifier in format 'pmpt_<48-digit-hash>' + variables: + items: + type: string + type: array + title: Variables + description: List of variable names that can be used in the prompt template + is_default: + type: boolean + title: Is Default + description: Boolean indicating whether this version is the default version + default: false type: object + required: + - version + - prompt_id + title: Prompt + description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + ListPromptsResponse: properties: data: + items: + $ref: '#/components/schemas/Prompt' type: array + title: Data + type: object + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + CreatePromptRequest: + properties: + prompt: + type: string + title: Prompt + variables: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + required: + - prompt + title: CreatePromptRequest + UpdatePromptRequest: + properties: + prompt: + type: string + title: Prompt + version: + type: integer + title: Version + variables: + anyOf: + - items: + type: string + type: array + - type: 'null' + set_as_default: + type: boolean + title: Set As Default + default: true + type: object + required: + - prompt + - version + title: UpdatePromptRequest + SetDefaultVersionRequest: + properties: + version: + type: integer + title: Version + type: object + required: + - version + title: SetDefaultVersionRequest + ProviderInfo: + properties: + api: + type: string + title: Api + provider_id: + type: string + title: Provider Id + provider_type: + type: string + title: Provider Type + config: + additionalProperties: true + type: object + title: Config + health: + additionalProperties: true + type: object + title: Health + type: object + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: Information about a registered provider including its configuration and health status. + ListProvidersResponse: + properties: + data: + items: + $ref: '#/components/schemas/ProviderInfo' + type: array + title: Data + type: object + required: + - data + title: ListProvidersResponse + description: Response containing a list of all available providers. + ListOpenAIResponseObject: + properties: + data: items: $ref: '#/components/schemas/OpenAIResponseObjectWithInput' - description: >- - List of response objects with their input context + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more results available beyond this page + title: Has More first_id: type: string - description: >- - Identifier of the first item in this page + title: First Id last_id: type: string - description: Identifier of the last item in this page + title: Last Id object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false + type: object required: - - data - - has_more - - first_id - - last_id - - object + - data + - has_more + - first_id + - last_id title: ListOpenAIResponseObject - description: >- - Paginated list of OpenAI response objects with navigation metadata. - OpenAIResponseAnnotationCitation: - type: object - properties: - type: - type: string - const: url_citation - default: url_citation - description: >- - Annotation type identifier, always "url_citation" - end_index: - type: integer - description: >- - End position of the citation span in the content - start_index: - type: integer - description: >- - Start position of the citation span in the content - title: - type: string - description: Title of the referenced web resource - url: - type: string - description: URL of the referenced web resource - additionalProperties: false - required: - - type - - end_index - - start_index - - title - - url - title: OpenAIResponseAnnotationCitation - description: >- - URL citation annotation for referencing external web resources. - "OpenAIResponseAnnotationContainerFileCitation": - type: object - properties: - type: - type: string - const: container_file_citation - default: container_file_citation - container_id: - type: string - end_index: - type: integer - file_id: - type: string - filename: - type: string - start_index: - type: integer - additionalProperties: false - required: - - type - - container_id - - end_index - - file_id - - filename - - start_index - title: >- - OpenAIResponseAnnotationContainerFileCitation - OpenAIResponseAnnotationFileCitation: - type: object - properties: - type: - type: string - const: file_citation - default: file_citation - description: >- - Annotation type identifier, always "file_citation" - file_id: - type: string - description: Unique identifier of the referenced file - filename: - type: string - description: Name of the referenced file - index: - type: integer - description: >- - Position index of the citation within the content - additionalProperties: false - required: - - type - - file_id - - filename - - index - title: OpenAIResponseAnnotationFileCitation - description: >- - File citation annotation for referencing specific files in response content. - OpenAIResponseAnnotationFilePath: - type: object - properties: - type: - type: string - const: file_path - default: file_path - file_id: - type: string - index: - type: integer - additionalProperties: false - required: - - type - - file_id - - index - title: OpenAIResponseAnnotationFilePath - OpenAIResponseAnnotations: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' - discriminator: - propertyName: type - mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' - container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' - OpenAIResponseContentPartRefusal: - type: object - properties: - type: - type: string - const: refusal - default: refusal - description: >- - Content part type identifier, always "refusal" - refusal: - type: string - description: Refusal text supplied by the model - additionalProperties: false - required: - - type - - refusal - title: OpenAIResponseContentPartRefusal - description: >- - Refusal content within a streamed response part. + description: Paginated list of OpenAI response objects with navigation metadata. OpenAIResponseError: - type: object properties: code: type: string - description: >- - Error code identifying the type of failure + title: Code message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false + title: Message + type: object required: - - code - - message + - code + - message title: OpenAIResponseError - description: >- - Error details for failed OpenAI response requests. + description: Error details for failed OpenAI response requests. OpenAIResponseInput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + anyOf: + - discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: - $ref: '#/components/schemas/OpenAIResponseMessage' - "OpenAIResponseInputFunctionToolCallOutput": - type: object - properties: - call_id: - type: string - output: - type: string - type: - type: string - const: function_call_output - default: function_call_output - id: - type: string - status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. - OpenAIResponseInputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - discriminator: - propertyName: type - mapping: - input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' - input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' - OpenAIResponseInputMessageContentImage: - type: object - properties: - detail: - oneOf: - - type: string - const: low - - type: string - const: high - - type: string - const: auto - default: auto - description: >- - Level of detail for image processing, can be "low", "high", or "auto" - type: - type: string - const: input_image - default: input_image - description: >- - Content type identifier, always "input_image" - image_url: - type: string - description: (Optional) URL of the image content - additionalProperties: false - required: - - detail - - type - title: OpenAIResponseInputMessageContentImage - description: >- - Image content for input messages in OpenAI response format. - OpenAIResponseInputMessageContentText: - type: object - properties: - text: - type: string - description: The text content of the input message - type: - type: string - const: input_text - default: input_text - description: >- - Content type identifier, always "input_text" - additionalProperties: false - required: - - text - - type - title: OpenAIResponseInputMessageContentText - description: >- - Text content for input messages in OpenAI response format. + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage OpenAIResponseInputToolFileSearch: - type: object properties: type: type: string const: file_search + title: Type default: file_search - description: >- - Tool type identifier, always "file_search" vector_store_ids: - type: array items: type: string - description: >- - List of vector store identifiers to search within + type: array + title: Vector Store Ids filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional filters to apply to the search + anyOf: + - additionalProperties: true + type: object + - type: 'null' max_num_results: - type: integer + anyOf: + - type: integer + maximum: 50.0 + minimum: 1.0 + - type: 'null' default: 10 - description: >- - (Optional) Maximum number of search results to return (1-50) ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: - type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - (Optional) Options for ranking and scoring search results - additionalProperties: false - required: - - type - - vector_store_ids - title: OpenAIResponseInputToolFileSearch - description: >- - File search tool configuration for OpenAI response inputs. - OpenAIResponseInputToolFunction: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions type: object + required: + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: properties: type: type: string const: function + title: Type default: function - description: Tool type identifier, always "function" name: type: string - description: Name of the function that can be called + title: Name description: - type: string - description: >- - (Optional) Description of what the function does + anyOf: + - type: string + - type: 'null' parameters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON schema defining the function's parameters + anyOf: + - additionalProperties: true + type: object + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict parameter validation - additionalProperties: false + anyOf: + - type: boolean + - type: 'null' + type: object required: - - type - - name + - name + - parameters title: OpenAIResponseInputToolFunction - description: >- - Function tool configuration for OpenAI response inputs. + description: Function tool configuration for OpenAI response inputs. OpenAIResponseInputToolWebSearch: - type: object properties: type: - oneOf: - - type: string - const: web_search - - type: string - const: web_search_preview - - type: string - const: web_search_preview_2025_03_11 + title: Type default: web_search - description: Web search tool type variant to use + type: string + enum: + - web_search + - web_search_preview + - web_search_preview_2025_03_11 + - web_search_2025_08_26 search_context_size: - type: string + anyOf: + - type: string + pattern: ^low|medium|high$ + - type: 'null' default: medium - description: >- - (Optional) Size of search context, must be "low", "medium", or "high" - additionalProperties: false - required: - - type + type: object title: OpenAIResponseInputToolWebSearch - description: >- - Web search tool configuration for OpenAI response inputs. - OpenAIResponseMCPApprovalRequest: - type: object - properties: - arguments: - type: string - id: - type: string - name: - type: string - server_label: - type: string - type: - type: string - const: mcp_approval_request - default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: - type: object - properties: - approval_request_id: - type: string - approve: - type: boolean - type: - type: string - const: mcp_approval_response - default: mcp_approval_response - id: - type: string - reason: - type: string - additionalProperties: false - required: - - approval_request_id - - approve - - type - title: OpenAIResponseMCPApprovalResponse - description: A response to an MCP approval request. - OpenAIResponseMessage: - type: object - properties: - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputMessageContent' - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' - role: - oneOf: - - type: string - const: system - - type: string - const: developer - - type: string - const: user - - type: string - const: assistant - type: - type: string - const: message - default: message - id: - type: string - status: - type: string - additionalProperties: false - required: - - content - - role - - type - title: OpenAIResponseMessage - description: >- - Corresponds to the various Message types in the Responses API. They are all - under one type because the Responses API gives them all the same "type" value, - and there is no way to tell them apart in certain scenarios. + description: Web search tool configuration for OpenAI response inputs. OpenAIResponseObjectWithInput: - type: object properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: >- - List of input items that led to this response - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Input + type: object required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - - input + - created_at + - id + - model + - output + - status + - input title: OpenAIResponseObjectWithInput - description: >- - OpenAI response object extended with input context information. + description: OpenAI response object extended with input context information. OpenAIResponseOutput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - OpenAIResponseOutputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - discriminator: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' propertyName: type - mapping: - output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' - "OpenAIResponseOutputMessageContentOutputText": - type: object - properties: - text: - type: string - type: - type: string - const: output_text - default: output_text - annotations: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - additionalProperties: false - required: - - text - - type - - annotations - title: >- - OpenAIResponseOutputMessageContentOutputText - "OpenAIResponseOutputMessageFileSearchToolCall": - type: object + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + OpenAIResponsePrompt: properties: id: type: string - description: Unique identifier for this tool call - queries: - type: array - items: - type: string - description: List of search queries executed - status: - type: string - description: >- - Current status of the file search operation - type: - type: string - const: file_search_call - default: file_search_call - description: >- - Tool call type identifier, always "file_search_call" - results: - type: array - items: + title: Id + variables: + anyOf: + - additionalProperties: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile type: object - properties: - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes associated with the file - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: >- - Relevance score for this search result (between 0 and 1) - text: - type: string - description: Text content of the search result - additionalProperties: false - required: - - attributes - - file_id - - filename - - score - - text - title: >- - OpenAIResponseOutputMessageFileSearchToolCallResults - description: >- - Search results returned by the file search operation. - description: >- - (Optional) Search results returned by the file search operation - additionalProperties: false - required: - - id - - queries - - status - - type - title: >- - OpenAIResponseOutputMessageFileSearchToolCall - description: >- - File search tool call output message for OpenAI responses. - "OpenAIResponseOutputMessageFunctionToolCall": + - type: 'null' + version: + anyOf: + - type: string + - type: 'null' type: object - properties: - call_id: - type: string - description: Unique identifier for the function call - name: - type: string - description: Name of the function being called - arguments: - type: string - description: >- - JSON string containing the function arguments - type: - type: string - const: function_call - default: function_call - description: >- - Tool call type identifier, always "function_call" - id: - type: string - description: >- - (Optional) Additional identifier for the tool call - status: - type: string - description: >- - (Optional) Current status of the function call execution - additionalProperties: false required: - - call_id - - name - - arguments - - type - title: >- - OpenAIResponseOutputMessageFunctionToolCall - description: >- - Function tool call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPCall: - type: object - properties: - id: - type: string - description: Unique identifier for this MCP call - type: - type: string - const: mcp_call - default: mcp_call - description: >- - Tool call type identifier, always "mcp_call" - arguments: - type: string - description: >- - JSON string containing the MCP call arguments - name: - type: string - description: Name of the MCP method being called - server_label: - type: string - description: >- - Label identifying the MCP server handling the call - error: - type: string - description: >- - (Optional) Error message if the MCP call failed - output: - type: string - description: >- - (Optional) Output result from the successful MCP call - additionalProperties: false - required: - - id - - type - - arguments - - name - - server_label - title: OpenAIResponseOutputMessageMCPCall - description: >- - Model Context Protocol (MCP) call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPListTools: - type: object - properties: - id: - type: string - description: >- - Unique identifier for this MCP list tools operation - type: - type: string - const: mcp_list_tools - default: mcp_list_tools - description: >- - Tool call type identifier, always "mcp_list_tools" - server_label: - type: string - description: >- - Label identifying the MCP server providing the tools - tools: - type: array - items: - type: object - properties: - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - JSON schema defining the tool's input parameters - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Description of what the tool does - additionalProperties: false - required: - - input_schema - - name - title: MCPListToolsTool - description: >- - Tool definition returned by MCP list tools operation. - description: >- - List of available tools provided by the MCP server - additionalProperties: false - required: - - id - - type - - server_label - - tools - title: OpenAIResponseOutputMessageMCPListTools - description: >- - MCP list tools output message containing available tools from an MCP server. - "OpenAIResponseOutputMessageWebSearchToolCall": - type: object - properties: - id: - type: string - description: Unique identifier for this tool call - status: - type: string - description: >- - Current status of the web search operation - type: - type: string - const: web_search_call - default: web_search_call - description: >- - Tool call type identifier, always "web_search_call" - additionalProperties: false - required: - - id - - status - - type - title: >- - OpenAIResponseOutputMessageWebSearchToolCall - description: >- - Web search tool call output message for OpenAI responses. + - id + title: OpenAIResponsePrompt + description: OpenAI compatible Prompt object that is used in OpenAI responses. OpenAIResponseText: - type: object properties: format: - type: object - properties: - type: - oneOf: - - type: string - const: text - - type: string - const: json_schema - - type: string - const: json_object - description: >- - Must be "text", "json_schema", or "json_object" to identify the format - type - name: - type: string - description: >- - The name of the response format. Only used for json_schema. - schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. Only used for json_schema. - description: - type: string - description: >- - (Optional) A description of the response format. Only used for json_schema. - strict: - type: boolean - description: >- - (Optional) Whether to strictly enforce the JSON schema. If true, the - response must match the schema exactly. Only used for json_schema. - additionalProperties: false - required: - - type - description: >- - (Optional) Text format configuration specifying output format requirements - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseTextFormat' + title: OpenAIResponseTextFormat + - type: 'null' + title: OpenAIResponseTextFormat + type: object title: OpenAIResponseText - description: >- - Text response configuration for OpenAI responses. + description: Text response configuration for OpenAI responses. OpenAIResponseTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - title: OpenAIResponseToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response object. - OpenAIResponseUsage: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + title: OpenAIResponseToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: properties: input_tokens: type: integer - description: Number of tokens in the input + title: Input Tokens output_tokens: type: integer - description: Number of tokens in the output + title: Output Tokens total_tokens: type: integer - description: Total tokens used (input + output) + title: Total Tokens input_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - description: Detailed breakdown of input token usage + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails' + title: OpenAIResponseUsageInputTokensDetails + - type: 'null' + title: OpenAIResponseUsageInputTokensDetails output_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - description: Detailed breakdown of output token usage - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails' + title: OpenAIResponseUsageOutputTokensDetails + - type: 'null' + title: OpenAIResponseUsageOutputTokensDetails + type: object required: - - input_tokens - - output_tokens - - total_tokens + - input_tokens + - output_tokens + - total_tokens title: OpenAIResponseUsage description: Usage information for OpenAI response. ResponseGuardrailSpec: - type: object + description: Specification for a guardrail to apply during response generation. properties: type: + title: Type type: string - description: The type/identifier of the guardrail. - additionalProperties: false required: - - type + - type title: ResponseGuardrailSpec - description: >- - Specification for a guardrail to apply during response generation. + type: object OpenAIResponseInputTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseInputToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label server_url: type: string - description: URL endpoint of the MCP server + title: Server Url headers: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) HTTP headers to include when connecting to the server + anyOf: + - additionalProperties: true + type: object + - type: 'null' + authorization: + anyOf: + - type: string + - type: 'null' require_approval: - oneOf: - - type: string - const: always - - type: string - const: never - - type: object - properties: - always: - type: array - items: - type: string - description: >- - (Optional) List of tool names that always require approval - never: - type: array - items: - type: string - description: >- - (Optional) List of tool names that never require approval - additionalProperties: false - title: ApprovalFilter - description: >- - Filter configuration for MCP tool approval requirements. + anyOf: + - type: string + const: always + - type: string + const: never + - $ref: '#/components/schemas/ApprovalFilter' + title: ApprovalFilter + title: string | ApprovalFilter default: never - description: >- - Approval requirement for tool calls ("always", "never", or filter) allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - - server_url - - require_approval - title: OpenAIResponseInputToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response inputs. - CreateOpenaiResponseRequest: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + - server_url + title: OpenAIResponseInputToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: properties: input: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: Input message(s) to create the response. + anyOf: + - type: string + - items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input + type: array + title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] + title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] model: type: string - description: The underlying LLM used for completions. + title: Model + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt instructions: - type: string + anyOf: + - type: string + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) if specified, the new response will be a continuation of the - previous response. This can be used to easily fork-off new responses from - existing responses. + anyOf: + - type: string + - type: 'null' conversation: - type: string - description: >- - (Optional) The ID of a conversation to add the response to. Must begin - with 'conv_'. Input and output messages will be automatically added to - the conversation. + anyOf: + - type: string + - type: 'null' store: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: true stream: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: false temperature: - type: number + anyOf: + - type: number + - type: 'null' text: - $ref: '#/components/schemas/OpenAIResponseText' + anyOf: + - $ref: '#/components/schemas/OpenAIResponseText' + title: OpenAIResponseText + - type: 'null' + title: OpenAIResponseText tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputTool' + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' include: - type: array - items: - type: string - description: >- - (Optional) Additional fields to include in the response. + anyOf: + - items: + type: string + type: array + - type: 'null' max_infer_iters: - type: integer - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + default: 10 + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object required: - - input - - model + - input + - model title: CreateOpenaiResponseRequest OpenAIResponseObject: - type: object properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context - additionalProperties: false - required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - title: OpenAIResponseObject - description: >- - Complete OpenAI response object containing generation results and metadata. - OpenAIResponseContentPartOutputText: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object + required: + - created_at + - id + - model + - output + - status + title: OpenAIResponseObject + description: Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + description: Text content within a streamed response part. properties: type: - type: string const: output_text default: output_text - description: >- - Content part type identifier, always "output_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Text emitted for this content part annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - description: >- - Structured annotations associated with the text + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + title: Annotations + type: array logprobs: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) Token log probability details - additionalProperties: false + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + nullable: true required: - - type - - text - - annotations + - text title: OpenAIResponseContentPartOutputText - description: >- - Text content within a streamed response part. - "OpenAIResponseContentPartReasoningSummary": type: object + OpenAIResponseContentPartReasoningSummary: + description: Reasoning summary part in a streamed response. properties: type: - type: string const: summary_text default: summary_text - description: >- - Content part type identifier, always "summary_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Summary text - additionalProperties: false required: - - type - - text - title: >- - OpenAIResponseContentPartReasoningSummary - description: >- - Reasoning summary part in a streamed response. - OpenAIResponseContentPartReasoningText: + - text + title: OpenAIResponseContentPartReasoningSummary type: object + OpenAIResponseContentPartReasoningText: + description: Reasoning text emitted as part of a streamed response. properties: type: - type: string const: reasoning_text default: reasoning_text - description: >- - Content part type identifier, always "reasoning_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Reasoning text supplied by the model - additionalProperties: false required: - - type - - text + - text title: OpenAIResponseContentPartReasoningText - description: >- - Reasoning text emitted as part of a streamed response. + type: object OpenAIResponseObjectStream: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: - propertyName: type mapping: - response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' - "OpenAIResponseObjectStreamResponseCompleted": - type: object + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + title: OpenAIResponseObjectStreamResponseCreated + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + title: OpenAIResponseObjectStreamResponseInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + title: OpenAIResponseObjectStreamResponseOutputItemAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + title: OpenAIResponseObjectStreamResponseOutputItemDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + title: OpenAIResponseObjectStreamResponseOutputTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + title: OpenAIResponseObjectStreamResponseOutputTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + title: OpenAIResponseObjectStreamResponseMcpCallFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + title: OpenAIResponseObjectStreamResponseContentPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + title: OpenAIResponseObjectStreamResponseContentPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + title: OpenAIResponseObjectStreamResponseReasoningTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + title: OpenAIResponseObjectStreamResponseRefusalDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + title: OpenAIResponseObjectStreamResponseRefusalDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + title: OpenAIResponseObjectStreamResponseIncomplete + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + title: OpenAIResponseObjectStreamResponseFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + title: OpenAIResponseObjectStreamResponseCompleted + title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + OpenAIResponseObjectStreamResponseCompleted: + description: Streaming event indicating a response has been completed. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Completed response object type: - type: string const: response.completed default: response.completed - description: >- - Event type identifier, always "response.completed" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCompleted - description: >- - Streaming event indicating a response has been completed. - "OpenAIResponseObjectStreamResponseContentPartAdded": + - response + title: OpenAIResponseObjectStreamResponseCompleted type: object + OpenAIResponseObjectStreamResponseContentPartAdded: + description: Streaming event for when a new content part is added to a response item. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The content part that was added + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.added default: response.content_part.added - description: >- - Event type identifier, always "response.content_part.added" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartAdded - description: >- - Streaming event for when a new content part is added to a response item. - "OpenAIResponseObjectStreamResponseContentPartDone": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartAdded type: object + OpenAIResponseObjectStreamResponseContentPartDone: + description: Streaming event for when a content part is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The completed content part + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.done default: response.content_part.done - description: >- - Event type identifier, always "response.content_part.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartDone - description: >- - Streaming event for when a content part is completed. - "OpenAIResponseObjectStreamResponseCreated": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartDone type: object + OpenAIResponseObjectStreamResponseCreated: + description: Streaming event indicating a new response has been created. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The response object that was created type: - type: string const: response.created default: response.created - description: >- - Event type identifier, always "response.created" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCreated - description: >- - Streaming event indicating a new response has been created. - OpenAIResponseObjectStreamResponseFailed: + - response + title: OpenAIResponseObjectStreamResponseCreated type: object + OpenAIResponseObjectStreamResponseFailed: + description: Streaming event emitted when a response fails. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Response object describing the failure sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.failed default: response.failed - description: >- - Event type identifier, always "response.failed" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type + - response + - sequence_number title: OpenAIResponseObjectStreamResponseFailed - description: >- - Streaming event emitted when a response fails. - "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": type: object + OpenAIResponseObjectStreamResponseFileSearchCallCompleted: + description: Streaming event for completed file search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.completed default: response.file_search_call.completed - description: >- - Event type identifier, always "response.file_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallCompleted - description: >- - Streaming event for completed file search calls. - "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseFileSearchCallInProgress: + description: Streaming event for file search calls in progress. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.in_progress default: response.file_search_call.in_progress - description: >- - Event type identifier, always "response.file_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallInProgress - description: >- - Streaming event for file search calls in progress. - "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseFileSearchCallSearching: + description: Streaming event for file search currently searching. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.searching default: response.file_search_call.searching - description: >- - Event type identifier, always "response.file_search_call.searching" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallSearching - description: >- - Streaming event for file search currently searching. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta: + description: Streaming event for incremental function call argument updates. properties: delta: + title: Delta type: string - description: >- - Incremental function call arguments being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the function call being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.delta default: response.function_call_arguments.delta - description: >- - Event type identifier, always "response.function_call_arguments.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta - description: >- - Streaming event for incremental function call argument updates. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone: + description: Streaming event for when function call arguments are completed. properties: arguments: + title: Arguments type: string - description: >- - Final complete arguments JSON string for the function call item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed function call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.done default: response.function_call_arguments.done - description: >- - Event type identifier, always "response.function_call_arguments.done" - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone - description: >- - Streaming event for when function call arguments are completed. - "OpenAIResponseObjectStreamResponseInProgress": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseInProgress: + description: Streaming event indicating the response remains in progress. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Current response state while in progress sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.in_progress default: response.in_progress - description: >- - Event type identifier, always "response.in_progress" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseInProgress - description: >- - Streaming event indicating the response remains in progress. - "OpenAIResponseObjectStreamResponseIncomplete": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseInProgress type: object + OpenAIResponseObjectStreamResponseIncomplete: + description: Streaming event emitted when a response ends in an incomplete state. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: >- - Response object describing the incomplete state sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.incomplete default: response.incomplete - description: >- - Event type identifier, always "response.incomplete" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseIncomplete - description: >- - Streaming event emitted when a response ends in an incomplete state. - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseIncomplete type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta: properties: delta: + title: Delta type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.delta default: response.mcp_call.arguments.delta - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone: properties: arguments: + title: Arguments type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.done default: response.mcp_call.arguments.done - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDone - "OpenAIResponseObjectStreamResponseMcpCallCompleted": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseMcpCallCompleted: + description: Streaming event for completed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.completed default: response.mcp_call.completed - description: >- - Event type identifier, always "response.mcp_call.completed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallCompleted - description: Streaming event for completed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallCompleted type: object + OpenAIResponseObjectStreamResponseMcpCallFailed: + description: Streaming event for failed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.failed default: response.mcp_call.failed - description: >- - Event type identifier, always "response.mcp_call.failed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallFailed - description: Streaming event for failed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallFailed type: object + OpenAIResponseObjectStreamResponseMcpCallInProgress: + description: Streaming event for MCP calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the MCP call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.in_progress default: response.mcp_call.in_progress - description: >- - Event type identifier, always "response.mcp_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallInProgress - description: >- - Streaming event for MCP calls in progress. - "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallInProgress type: object + OpenAIResponseObjectStreamResponseMcpListToolsCompleted: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.completed default: response.mcp_list_tools.completed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsCompleted - "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted type: object + OpenAIResponseObjectStreamResponseMcpListToolsFailed: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.failed default: response.mcp_list_tools.failed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsFailed - "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed type: object + OpenAIResponseObjectStreamResponseMcpListToolsInProgress: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.in_progress default: response.mcp_list_tools.in_progress - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsInProgress - "OpenAIResponseObjectStreamResponseOutputItemAdded": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress type: object + OpenAIResponseObjectStreamResponseOutputItemAdded: + description: Streaming event for when a new output item is added to the response. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The output item that was added (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.added default: response.output_item.added - description: >- - Event type identifier, always "response.output_item.added" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemAdded - description: >- - Streaming event for when a new output item is added to the response. - "OpenAIResponseObjectStreamResponseOutputItemDone": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemAdded type: object + OpenAIResponseObjectStreamResponseOutputItemDone: + description: Streaming event for when an output item is completed. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The completed output item (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.done default: response.output_item.done - description: >- - Event type identifier, always "response.output_item.done" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemDone - description: >- - Streaming event for when an output item is completed. - "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemDone type: object + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded: + description: Streaming event for when an annotation is added to output text. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the item to which the annotation is being added output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response's output array content_index: + title: Content Index type: integer - description: >- - Index position of the content part within the output item annotation_index: + title: Annotation Index type: integer - description: >- - Index of the annotation within the content part annotation: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' - description: The annotation object being added + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.annotation.added default: response.output_text.annotation.added - description: >- - Event type identifier, always "response.output_text.annotation.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - content_index - - annotation_index - - annotation - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded - description: >- - Streaming event for when an annotation is added to output text. - "OpenAIResponseObjectStreamResponseOutputTextDelta": + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded type: object + OpenAIResponseObjectStreamResponseOutputTextDelta: + description: Streaming event for incremental text content updates. properties: content_index: + title: Content Index type: integer - description: Index position within the text content delta: + title: Delta type: string - description: Incremental text content being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.delta default: response.output_text.delta - description: >- - Event type identifier, always "response.output_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDelta - description: >- - Streaming event for incremental text content updates. - "OpenAIResponseObjectStreamResponseOutputTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDelta type: object + OpenAIResponseObjectStreamResponseOutputTextDone: + description: Streaming event for when text output is completed. properties: content_index: + title: Content Index type: integer - description: Index position within the text content text: + title: Text type: string - description: >- - Final complete text content of the output item item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.done default: response.output_text.done - description: >- - Event type identifier, always "response.output_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDone - description: >- - Streaming event for when text output is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded: + description: Streaming event for when a new reasoning summary part is added. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The summary part that was added sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.added default: response.reasoning_summary_part.added - description: >- - Event type identifier, always "response.reasoning_summary_part.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded - description: >- - Streaming event for when a new reasoning summary part is added. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone: + description: Streaming event for when a reasoning summary part is completed. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The completed summary part sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.done default: response.reasoning_summary_part.done - description: >- - Event type identifier, always "response.reasoning_summary_part.done" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartDone - description: >- - Streaming event for when a reasoning summary part is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta: + description: Streaming event for incremental reasoning summary text updates. properties: delta: + title: Delta type: string - description: Incremental summary text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.delta default: response.reasoning_summary_text.delta - description: >- - Event type identifier, always "response.reasoning_summary_text.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta - description: >- - Streaming event for incremental reasoning summary text updates. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + - delta + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone: + description: Streaming event for when reasoning summary text is completed. properties: text: + title: Text type: string - description: Final complete summary text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.done default: response.reasoning_summary_text.done - description: >- - Event type identifier, always "response.reasoning_summary_text.done" - additionalProperties: false + title: Type + type: string required: - - text - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDone - description: >- - Streaming event for when reasoning summary text is completed. - "OpenAIResponseObjectStreamResponseReasoningTextDelta": + - text + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone type: object + OpenAIResponseObjectStreamResponseReasoningTextDelta: + description: Streaming event for incremental reasoning text updates. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part delta: + title: Delta type: string - description: Incremental reasoning text being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.delta default: response.reasoning_text.delta - description: >- - Event type identifier, always "response.reasoning_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDelta - description: >- - Streaming event for incremental reasoning text updates. - "OpenAIResponseObjectStreamResponseReasoningTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningTextDone: + description: Streaming event for when reasoning text is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part text: + title: Text type: string - description: Final complete reasoning text item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.done default: response.reasoning_text.done - description: >- - Event type identifier, always "response.reasoning_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDone - description: >- - Streaming event for when reasoning text is completed. - "OpenAIResponseObjectStreamResponseRefusalDelta": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDone type: object + OpenAIResponseObjectStreamResponseRefusalDelta: + description: Streaming event for incremental refusal text updates. properties: content_index: + title: Content Index type: integer - description: Index position of the content part delta: + title: Delta type: string - description: Incremental refusal text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.delta default: response.refusal.delta - description: >- - Event type identifier, always "response.refusal.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDelta - description: >- - Streaming event for incremental refusal text updates. - "OpenAIResponseObjectStreamResponseRefusalDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDelta type: object + OpenAIResponseObjectStreamResponseRefusalDone: + description: Streaming event for when refusal text is completed. properties: content_index: + title: Content Index type: integer - description: Index position of the content part refusal: + title: Refusal type: string - description: Final complete refusal text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.done default: response.refusal.done - description: >- - Event type identifier, always "response.refusal.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - refusal - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDone - description: >- - Streaming event for when refusal text is completed. - "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + - content_index + - refusal + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDone type: object + OpenAIResponseObjectStreamResponseWebSearchCallCompleted: + description: Streaming event for completed web search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.completed default: response.web_search_call.completed - description: >- - Event type identifier, always "response.web_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallCompleted - description: >- - Streaming event for completed web search calls. - "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseWebSearchCallInProgress: + description: Streaming event for web search calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.in_progress default: response.web_search_call.in_progress - description: >- - Event type identifier, always "response.web_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallInProgress - description: >- - Streaming event for web search calls in progress. - "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseWebSearchCallSearching: properties: item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.web_search_call.searching default: response.web_search_call.searching - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallSearching - OpenAIDeleteResponseObject: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching type: object + OpenAIDeleteResponseObject: properties: id: type: string - description: >- - Unique identifier of the deleted response + title: Id object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" deleted: type: boolean + title: Deleted default: true - description: Deletion confirmation flag, always True - additionalProperties: false - required: - - id - - object - - deleted - title: OpenAIDeleteResponseObject - description: >- - Response object confirming deletion of an OpenAI response. - ListOpenAIResponseInputItem: type: object + required: + - id + title: OpenAIDeleteResponseObject + description: Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: properties: data: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: List of input items + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Data object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - object - title: ListOpenAIResponseInputItem - description: >- - List container for OpenAI response input items. - VectorStoreFileCounts: type: object + required: + - data + title: ListOpenAIResponseInputItem + description: List container for OpenAI response input items. + RunShieldRequest: + properties: + shield_id: + type: string + title: Shield Id + messages: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) + type: array + title: Messages + params: + additionalProperties: true + type: object + title: Params + type: object + required: + - shield_id + - messages + - params + title: RunShieldRequest + RunShieldResponse: + properties: + violation: + anyOf: + - $ref: '#/components/schemas/SafetyViolation' + title: SafetyViolation + - type: 'null' + title: SafetyViolation + type: object + title: RunShieldResponse + description: Response from running a safety shield. + SafetyViolation: + properties: + violation_level: + $ref: '#/components/schemas/ViolationLevel' + user_message: + anyOf: + - type: string + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + type: object + required: + - violation_level + title: SafetyViolation + description: Details of a safety violation detected by content moderation. + ViolationLevel: + type: string + enum: + - info + - warn + - error + title: ViolationLevel + description: Severity level of a safety violation. + AggregationFunctionType: + type: string + enum: + - average + - weighted_average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: Types of aggregation functions for scoring results. + ArrayType: + properties: + type: + type: string + const: array + title: Type + default: array + type: object + title: ArrayType + description: Parameter type for array values. + BasicScoringFnParams: + properties: + type: + type: string + const: basic + title: Type + default: basic + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + title: BasicScoringFnParams + description: Parameters for basic scoring function configuration. + BooleanType: + properties: + type: + type: string + const: boolean + title: Type + default: boolean + type: object + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + properties: + type: + type: string + const: chat_completion_input + title: Type + default: chat_completion_input + type: object + title: ChatCompletionInputType + description: Parameter type for chat completion input. + CompletionInputType: + properties: + type: + type: string + const: completion_input + title: Type + default: completion_input + type: object + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + properties: + type: + type: string + const: json + title: Type + default: json + type: object + title: JsonType + description: Parameter type for JSON values. + LLMAsJudgeScoringFnParams: + properties: + type: + type: string + const: llm_as_judge + title: Type + default: llm_as_judge + judge_model: + type: string + title: Judge Model + prompt_template: + anyOf: + - type: string + - type: 'null' + judge_score_regexes: + items: + type: string + type: array + title: Judge Score Regexes + description: Regexes to extract the answer from generated response + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + required: + - judge_model + title: LLMAsJudgeScoringFnParams + description: Parameters for LLM-as-judge scoring function configuration. + NumberType: + properties: + type: + type: string + const: number + title: Type + default: number + type: object + title: NumberType + description: Parameter type for numeric values. + ObjectType: + properties: + type: + type: string + const: object + title: Type + default: object + type: object + title: ObjectType + description: Parameter type for object values. + RegexParserScoringFnParams: + properties: + type: + type: string + const: regex_parser + title: Type + default: regex_parser + parsing_regexes: + items: + type: string + type: array + title: Parsing Regexes + description: Regex to extract the answer from generated response + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + title: RegexParserScoringFnParams + description: Parameters for regex parser scoring function configuration. + ScoringFn: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: scoring_function + title: Type + default: scoring_function + description: + anyOf: + - type: string + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this definition + return_type: + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + description: The return type of the deterministic function + discriminator: + propertyName: type + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + params: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval + type: object + required: + - identifier + - provider_id + - return_type + title: ScoringFn + description: A scoring function resource for evaluating model outputs. + ScoringFnParams: + discriminator: + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + ScoringFnParamsType: + description: Types of scoring function parameter configurations. + enum: + - llm_as_judge + - regex_parser + - basic + title: ScoringFnParamsType + type: string + StringType: + properties: + type: + type: string + const: string + title: Type + default: string + type: object + title: StringType + description: Parameter type for string values. + UnionType: + properties: + type: + type: string + const: union + title: Type + default: union + type: object + title: UnionType + description: Parameter type for union values. + ListScoringFunctionsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ScoringFn' + type: array + title: Data + type: object + required: + - data + title: ListScoringFunctionsResponse + ScoreRequest: + properties: + input_rows: + items: + additionalProperties: true + type: object + type: array + title: Input Rows + scoring_functions: + additionalProperties: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions + type: object + required: + - input_rows + - scoring_functions + title: ScoreRequest + ScoreResponse: + properties: + results: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Results + type: object + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoringResult: + properties: + score_rows: + items: + additionalProperties: true + type: object + type: array + title: Score Rows + aggregated_results: + additionalProperties: true + type: object + title: Aggregated Results + type: object + required: + - score_rows + - aggregated_results + title: ScoringResult + description: A scoring result for a single row. + ScoreBatchRequest: + properties: + dataset_id: + type: string + title: Dataset Id + scoring_functions: + additionalProperties: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions + save_results_dataset: + type: boolean + title: Save Results Dataset + default: false + type: object + required: + - dataset_id + - scoring_functions + title: ScoreBatchRequest + ScoreBatchResponse: + properties: + dataset_id: + anyOf: + - type: string + - type: 'null' + results: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Results + type: object + required: + - results + title: ScoreBatchResponse + description: Response from batch scoring operations on datasets. + Shield: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: shield + title: Type + default: shield + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - identifier + - provider_id + title: Shield + description: A safety shield resource that can be used to check content. + ListShieldsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Shield' + type: array + title: Data + type: object + required: + - data + title: ListShieldsResponse + InvokeToolRequest: + properties: + tool_name: + type: string + title: Tool Name + kwargs: + additionalProperties: true + type: object + title: Kwargs + authorization: + anyOf: + - type: string + - type: 'null' + type: object + required: + - tool_name + - kwargs + title: InvokeToolRequest + ImageContentItem: + description: A image content item + properties: + type: + const: image + default: image + title: Type + type: string + image: + $ref: '#/components/schemas/_URLOrData' + required: + - image + title: ImageContentItem + type: object + InterleavedContent: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + InterleavedContentItem: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + TextContentItem: + properties: + type: + type: string + const: text + title: Type + default: text + text: + type: string + title: Text + type: object + required: + - text + title: TextContentItem + description: A text content item + ToolInvocationResult: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem-Output | TextContentItem] + error_message: + anyOf: + - type: string + - type: 'null' + error_code: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: ToolInvocationResult + description: Result of a tool invocation. + URL: + properties: + uri: + type: string + title: Uri + type: object + required: + - uri + title: URL + description: A URL reference to external content. + ToolDef: + properties: + toolgroup_id: + anyOf: + - type: string + - type: 'null' + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + output_schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - name + title: ToolDef + description: Tool definition used in runtime contexts. + ListToolDefsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ToolDef' + type: array + title: Data + type: object + required: + - data + title: ListToolDefsResponse + description: Response containing a list of tool definitions. + ToolGroup: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: tool_group + title: Type + default: tool_group + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - identifier + - provider_id + title: ToolGroup + description: A group of related tools managed together. + ListToolGroupsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ToolGroup' + type: array + title: Data + type: object + required: + - data + title: ListToolGroupsResponse + description: Response containing a list of tool groups. + Chunk: + description: A chunk of content that can be inserted into a vector database. + properties: + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + chunk_id: + title: Chunk Id + type: string + metadata: + additionalProperties: true + title: Metadata + type: object + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + nullable: true + title: ChunkMetadata + required: + - content + - chunk_id + title: Chunk + type: object + ChunkMetadata: + properties: + chunk_id: + anyOf: + - type: string + - type: 'null' + document_id: + anyOf: + - type: string + - type: 'null' + source: + anyOf: + - type: string + - type: 'null' + created_timestamp: + anyOf: + - type: integer + - type: 'null' + updated_timestamp: + anyOf: + - type: integer + - type: 'null' + chunk_window: + anyOf: + - type: string + - type: 'null' + chunk_tokenizer: + anyOf: + - type: string + - type: 'null' + chunk_embedding_model: + anyOf: + - type: string + - type: 'null' + chunk_embedding_dimension: + anyOf: + - type: integer + - type: 'null' + content_token_count: + anyOf: + - type: integer + - type: 'null' + metadata_token_count: + anyOf: + - type: integer + - type: 'null' + type: object + title: ChunkMetadata + description: |- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + InsertChunksRequest: + properties: + vector_store_id: + type: string + title: Vector Store Id + chunks: + items: + $ref: '#/components/schemas/Chunk-Input' + type: array + title: Chunks + ttl_seconds: + anyOf: + - type: integer + - type: 'null' + type: object + required: + - vector_store_id + - chunks + title: InsertChunksRequest + QueryChunksRequest: + properties: + vector_store_id: + type: string + title: Vector Store Id + query: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - vector_store_id + - query + title: QueryChunksRequest + QueryChunksResponse: + properties: + chunks: + items: + $ref: '#/components/schemas/Chunk-Output' + type: array + title: Chunks + scores: + items: + type: number + type: array + title: Scores + type: object + required: + - chunks + - scores + title: QueryChunksResponse + description: Response from querying chunks in a vector database. + VectorStoreFileCounts: properties: completed: type: integer - description: >- - Number of files that have been successfully processed + title: Completed cancelled: type: integer - description: >- - Number of files that had their processing cancelled + title: Cancelled failed: type: integer - description: Number of files that failed to process + title: Failed in_progress: type: integer - description: >- - Number of files currently being processed + title: In Progress total: type: integer - description: >- - Total number of files in the vector store - additionalProperties: false - required: - - completed - - cancelled - - failed - - in_progress - - total - title: VectorStoreFileCounts - description: >- - File processing status counts for a vector store. - VectorStoreListResponse: + title: Total type: object + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: File processing status counts for a vector store. + VectorStoreListResponse: properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreObject' - description: List of vector store objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first vector store in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last vector store in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more vector stores available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreListResponse description: Response from listing vector stores. VectorStoreObject: - type: object properties: id: type: string - description: Unique identifier for the vector store + title: Id object: type: string + title: Object default: vector_store - description: >- - Object type identifier, always "vector_store" created_at: type: integer - description: >- - Timestamp when the vector store was created + title: Created At name: - type: string - description: (Optional) Name of the vector store + anyOf: + - type: string + - type: 'null' usage_bytes: type: integer + title: Usage Bytes default: 0 - description: >- - Storage space used by the vector store in bytes file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the vector store status: type: string + title: Status default: completed - description: Current status of the vector store expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store + anyOf: + - additionalProperties: true + type: object + - type: 'null' expires_at: - type: integer - description: >- - (Optional) Timestamp when the vector store will expire + anyOf: + - type: integer + - type: 'null' last_active_at: - type: integer - description: >- - (Optional) Timestamp of last activity on the vector store + anyOf: + - type: integer + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false + title: Metadata + type: object required: - - id - - object - - created_at - - usage_bytes - - file_counts - - status - - metadata + - id + - created_at + - file_counts title: VectorStoreObject description: OpenAI Vector Store object. - "OpenAICreateVectorStoreRequestWithExtraBody": - type: object - properties: - name: - type: string - description: (Optional) A name for the vector store - file_ids: - type: array - items: - type: string - description: >- - List of file IDs to include in the vector store - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store - chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Strategy for splitting files into chunks - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false - title: >- - OpenAICreateVectorStoreRequestWithExtraBody - description: >- - Request to create a vector store with extra_body support. - OpenaiUpdateVectorStoreRequest: - type: object - properties: - name: - type: string - description: The name of the vector store. - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The expiration policy for a vector store. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of 16 key-value pairs that can be attached to an object. - additionalProperties: false - title: OpenaiUpdateVectorStoreRequest - VectorStoreDeleteResponse: - type: object - properties: - id: - type: string - description: >- - Unique identifier of the deleted vector store - object: - type: string - default: vector_store.deleted - description: >- - Object type identifier for the deletion response - deleted: - type: boolean - default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreDeleteResponse - description: Response from deleting a vector store. VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' discriminator: - propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + propertyName: type + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic VectorStoreChunkingStrategyAuto: - type: object properties: type: type: string const: auto + title: Type default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: type: object + title: VectorStoreChunkingStrategyAuto + description: Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: properties: type: type: string const: static + title: Type default: static - description: >- - Strategy type, always "static" for static chunking static: $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: type: object + required: + - static + title: VectorStoreChunkingStrategyStatic + description: Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: properties: chunk_overlap_tokens: type: integer + title: Chunk Overlap Tokens default: 400 - description: >- - Number of tokens to overlap between adjacent chunks max_chunk_size_tokens: type: integer + maximum: 4096.0 + minimum: 100.0 + title: Max Chunk Size Tokens default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens + type: object title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. - "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": - type: object + description: Configuration for static chunking strategy. + OpenAICreateVectorStoreRequestWithExtraBody: properties: + name: + anyOf: + - type: string + - type: 'null' file_ids: - type: array - items: - type: string - description: >- - A list of File IDs that the vector store should use - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes to store with the files + anyOf: + - items: + type: string + type: array + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto - additionalProperties: false - required: - - file_ids - title: >- - OpenAICreateVectorStoreFileBatchRequestWithExtraBody - description: >- - Request to create a vector store file batch with extra_body support. - VectorStoreFileBatchObject: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + additionalProperties: true type: object + title: OpenAICreateVectorStoreRequestWithExtraBody + description: Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + properties: + name: + anyOf: + - type: string + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: properties: id: type: string - description: Unique identifier for the file batch + title: Id object: type: string + title: Object + default: vector_store.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + OpenAICreateVectorStoreFileBatchRequestWithExtraBody: + properties: + file_ids: + items: + type: string + type: array + title: File Ids + attributes: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + additionalProperties: true + type: object + required: + - file_ids + title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object default: vector_store.file_batch - description: >- - Object type identifier, always "vector_store.file_batch" created_at: type: integer - description: >- - Timestamp when the file batch was created + title: Created At vector_store_id: type: string - description: >- - ID of the vector store containing the file batch + title: Vector Store Id status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: >- - Current processing status of the file batch + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the batch - additionalProperties: false + type: object required: - - id - - object - - created_at - - vector_store_id - - status - - file_counts + - id + - created_at + - vector_store_id + - status + - file_counts title: VectorStoreFileBatchObject description: OpenAI Vector Store File Batch object. VectorStoreFileStatus: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed VectorStoreFileLastError: - type: object properties: code: - oneOf: - - type: string - const: server_error - - type: string - const: rate_limit_exceeded - description: >- - Error code indicating the type of failure + title: Code + type: string + enum: + - server_error + - rate_limit_exceeded + default: server_error message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: VectorStoreFileLastError - description: >- - Error information for failed vector store file processing. - VectorStoreFileObject: + title: Message type: object + required: + - code + - message + title: VectorStoreFileLastError + description: Error information for failed vector store file processing. + VectorStoreFileObject: properties: id: type: string - description: Unique identifier for the file + title: Id object: type: string + title: Object default: vector_store.file - description: >- - Object type identifier, always "vector_store.file" attributes: - type: object additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + title: string | number | boolean + propertyNames: + type: string + maxLength: 64 + type: object + maxProperties: 16 + title: Attributes + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers. + x-oaiTypeLabel: map chunking_strategy: oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic discriminator: propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - description: >- - Strategy used for splitting the file into chunks created_at: type: integer - description: >- - Timestamp when the file was added to the vector store + title: Created At last_error: - $ref: '#/components/schemas/VectorStoreFileLastError' - description: >- - (Optional) Error information if file processing failed + anyOf: + - $ref: '#/components/schemas/VectorStoreFileLastError' + title: VectorStoreFileLastError + - type: 'null' + title: VectorStoreFileLastError status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: Current processing status of the file + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed usage_bytes: type: integer + title: Usage Bytes default: 0 - description: Storage space used by this file in bytes vector_store_id: type: string - description: >- - ID of the vector store containing this file - additionalProperties: false + title: Vector Store Id + type: object required: - - id - - object - - attributes - - chunking_strategy - - created_at - - status - - usage_bytes - - vector_store_id + - id + - chunking_strategy + - created_at + - status + - vector_store_id title: VectorStoreFileObject description: OpenAI Vector Store File object. VectorStoreFilesListInBatchResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: >- - List of vector store file objects in the batch + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreFilesListInBatchResponse - description: >- - Response from listing files in a vector store file batch. + description: Response from listing files in a vector store file batch. VectorStoreListFilesResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: List of vector store file objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false - required: - - object - - data - - has_more - title: VectorStoreListFilesResponse - description: >- - Response from listing files in a vector store. - OpenaiAttachFileToVectorStoreRequest: type: object + required: + - data + title: VectorStoreListFilesResponse + description: Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: properties: file_id: type: string - description: >- - The ID of the file to attach to the vector store. + title: File Id attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The key-value attributes stored with the file, which can be used for filtering. + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - The chunking strategy to use for the file. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + type: object required: - - file_id + - file_id title: OpenaiAttachFileToVectorStoreRequest OpenaiUpdateVectorStoreFileRequest: - type: object properties: attributes: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The updated key-value attributes to store with the file. - additionalProperties: false + title: Attributes + type: object required: - - attributes + - attributes title: OpenaiUpdateVectorStoreFileRequest VectorStoreFileDeleteResponse: - type: object properties: id: type: string - description: Unique identifier of the deleted file + title: Id object: type: string + title: Object default: vector_store.file.deleted - description: >- - Object type identifier for the deletion response deleted: type: boolean + title: Deleted default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreFileDeleteResponse - description: >- - Response from deleting a vector store file. - VectorStoreContent: type: object + required: + - id + title: VectorStoreFileDeleteResponse + description: Response from deleting a vector store file. + VectorStoreContent: properties: type: type: string const: text - description: >- - Content type, currently only "text" is supported + title: Type text: type: string - description: The actual text content - additionalProperties: false - required: - - type - - text - title: VectorStoreContent - description: >- - Content item from a vector store file or search result. - VectorStoreFileContentsResponse: - type: object - properties: - file_id: - type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file - additionalProperties: false - required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse - description: >- - Response from retrieving the contents of a vector store file. - OpenaiSearchVectorStoreRequest: - type: object - properties: - query: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - The query string or array for performing the search. - filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Filters based on file attributes to narrow the search results. - max_num_results: - type: integer - description: >- - Maximum number of results to return (1 to 50 inclusive, default 10). - ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: + title: Text + embedding: + anyOf: + - items: type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - Ranking options for fine-tuning the search results. - rewrite_query: - type: boolean - description: >- - Whether to rewrite the natural language query for vector search (default - false) - search_mode: - type: string - description: >- - The search mode to use - "keyword", "vector", or "hybrid" (default "vector") - additionalProperties: false - required: - - query - title: OpenaiSearchVectorStoreRequest - VectorStoreSearchResponse: + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object - properties: - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: Relevance score for this search result - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: number - - type: boolean - description: >- - (Optional) Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: >- - List of content items matching the search query - additionalProperties: false required: - - file_id - - filename - - score - - content - title: VectorStoreSearchResponse - description: Response from searching a vector store. - VectorStoreSearchResponsePage: - type: object + - type + - text + title: VectorStoreContent + description: Content item from a vector store file or search result. + VectorStoreFileContentResponse: properties: object: type: string - default: vector_store.search_results.page - description: >- - Object type identifier for the search results page - search_query: - type: string - description: >- - The original search query that was executed + const: vector_store.file_content.page + title: Object + default: vector_store.file_content.page data: - type: array items: - $ref: '#/components/schemas/VectorStoreSearchResponse' - description: List of search result objects + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Data has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more results available beyond this page next_page: - type: string - description: >- - (Optional) Token for retrieving the next page of results - additionalProperties: false - required: - - object - - search_query - - data - - has_more - title: VectorStoreSearchResponsePage - description: >- - Paginated response from searching a vector store. - Checkpoint: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + title: VectorStoreFileContentResponse + description: Represents the parsed content of a vector store file. + OpenaiSearchVectorStoreRequest: + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + max_num_results: + anyOf: + - type: integer + - type: 'null' + default: 10 + ranking_options: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions + rewrite_query: + anyOf: + - type: boolean + - type: 'null' + default: false + search_mode: + anyOf: + - type: string + - type: 'null' + default: vector + type: object + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + properties: + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + attributes: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: number + - type: boolean + title: string | number | boolean + type: object + - type: 'null' + content: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Content + type: object + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + properties: + object: + type: string + title: Object + default: vector_store.search_results.page + search_query: + items: + type: string + type: array + title: Search Query + data: + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - search_query + - data + title: VectorStoreSearchResponsePage + description: Paginated response from searching a vector store. + VersionInfo: + properties: + version: + type: string + title: Version + type: object + required: + - version + title: VersionInfo + description: Version information for the service. + AppendRowsRequest: + properties: + rows: + items: + additionalProperties: true + type: object + type: array + title: Rows + type: object + required: + - rows + title: AppendRowsRequest + PaginatedResponse: + properties: + data: + items: + additionalProperties: true + type: object + type: array + title: Data + has_more: + type: boolean + title: Has More + url: + anyOf: + - type: string + - type: 'null' + type: object + required: + - data + - has_more + title: PaginatedResponse + description: A generic paginated response that follows a simple format. + Dataset: properties: identifier: type: string - description: Unique identifier for the checkpoint + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: dataset + title: Type + default: dataset + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this dataset + type: object + required: + - identifier + - provider_id + - purpose + - source + title: Dataset + description: Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + properties: + type: + type: string + const: rows + title: Type + default: rows + rows: + items: + additionalProperties: true + type: object + type: array + title: Rows + type: object + required: + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + properties: + type: + type: string + const: uri + title: Type + default: uri + uri: + type: string + title: Uri + type: object + required: + - uri + title: URIDataSource + description: A dataset that can be obtained from a URI. + ListDatasetsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Dataset' + type: array + title: Data + type: object + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + Benchmark: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: benchmark + title: Type + default: benchmark + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + metadata: + additionalProperties: true + type: object + title: Metadata + description: Metadata for this evaluation task + type: object + required: + - identifier + - provider_id + - dataset_id + - scoring_functions + title: Benchmark + description: A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + properties: + data: + items: + $ref: '#/components/schemas/Benchmark' + type: array + title: Data + type: object + required: + - data + title: ListBenchmarksResponse + BenchmarkConfig: + properties: + eval_candidate: + $ref: '#/components/schemas/ModelCandidate' + scoring_params: + additionalProperties: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + type: object + title: Scoring Params + description: Map between scoring function id and parameters for each scoring function you want to run + num_examples: + anyOf: + - type: integer + - type: 'null' + description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated + type: object + required: + - eval_candidate + title: BenchmarkConfig + description: A benchmark configuration for evaluation. + GreedySamplingStrategy: + properties: + type: + type: string + const: greedy + title: Type + default: greedy + type: object + title: GreedySamplingStrategy + description: Greedy sampling strategy that selects the highest probability token at each step. + ModelCandidate: + properties: + type: + type: string + const: model + title: Type + default: model + model: + type: string + title: Model + sampling_params: + $ref: '#/components/schemas/SamplingParams' + system_message: + anyOf: + - $ref: '#/components/schemas/SystemMessage' + title: SystemMessage + - type: 'null' + title: SystemMessage + type: object + required: + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. + SamplingParams: + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + max_tokens: + anyOf: + - type: integer + - type: 'null' + repetition_penalty: + anyOf: + - type: number + - type: 'null' + default: 1.0 + stop: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: SamplingParams + description: Sampling parameters. + SystemMessage: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + type: object + required: + - content + title: SystemMessage + description: A system message providing instructions or context to the model. + TopKSamplingStrategy: + properties: + type: + type: string + const: top_k + title: Type + default: top_k + top_k: + type: integer + minimum: 1.0 + title: Top K + type: object + required: + - top_k + title: TopKSamplingStrategy + description: Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + properties: + type: + type: string + const: top_p + title: Type + default: top_p + temperature: + anyOf: + - type: number + minimum: 0.0 + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + default: 0.95 + type: object + required: + - temperature + title: TopPSamplingStrategy + description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + EvaluateRowsRequest: + properties: + input_rows: + items: + additionalProperties: true + type: object + type: array + title: Input Rows + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + type: object + required: + - input_rows + - scoring_functions + - benchmark_config + title: EvaluateRowsRequest + EvaluateResponse: + properties: + generations: + items: + additionalProperties: true + type: object + type: array + title: Generations + scores: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Scores + type: object + required: + - generations + - scores + title: EvaluateResponse + description: The response from an evaluation. + RunEvalRequest: + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + type: object + required: + - benchmark_config + title: RunEvalRequest + Job: + properties: + job_id: + type: string + title: Job Id + status: + $ref: '#/components/schemas/JobStatus' + type: object + required: + - job_id + - status + title: Job + description: A job execution instance with status tracking. + RerankRequest: + properties: + model: + type: string + title: Model + query: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + items: + items: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + type: array + title: Items + max_num_results: + anyOf: + - type: integer + - type: 'null' + type: object + required: + - model + - query + - items + title: RerankRequest + RerankData: + properties: + index: + type: integer + title: Index + relevance_score: + type: number + title: Relevance Score + type: object + required: + - index + - relevance_score + title: RerankData + description: A single rerank result from a reranking response. + RerankResponse: + properties: + data: + items: + $ref: '#/components/schemas/RerankData' + type: array + title: Data + type: object + required: + - data + title: RerankResponse + description: Response from a reranking request. + Checkpoint: + properties: + identifier: + type: string + title: Identifier created_at: type: string format: date-time - description: >- - Timestamp when the checkpoint was created + title: Created At epoch: type: integer - description: >- - Training epoch when the checkpoint was saved + title: Epoch post_training_job_id: type: string - description: >- - Identifier of the training job that created this checkpoint + title: Post Training Job Id path: type: string - description: >- - File system path where the checkpoint is stored + title: Path training_metrics: - $ref: '#/components/schemas/PostTrainingMetric' - description: >- - (Optional) Training metrics associated with this checkpoint - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/PostTrainingMetric' + title: PostTrainingMetric + - type: 'null' + title: PostTrainingMetric + type: object required: - - identifier - - created_at - - epoch - - post_training_job_id - - path + - identifier + - created_at + - epoch + - post_training_job_id + - path title: Checkpoint description: Checkpoint created during training runs. PostTrainingJobArtifactsResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - checkpoints + - job_uuid title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. PostTrainingMetric: - type: object properties: epoch: type: integer - description: Training epoch number + title: Epoch train_loss: type: number - description: Loss value on the training dataset + title: Train Loss validation_loss: type: number - description: Loss value on the validation dataset + title: Validation Loss perplexity: type: number - description: >- - Perplexity metric indicating model confidence - additionalProperties: false - required: - - epoch - - train_loss - - validation_loss - - perplexity - title: PostTrainingMetric - description: >- - Training metrics captured during post-training jobs. - CancelTrainingJobRequest: + title: Perplexity type: object + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: Training metrics captured during post-training jobs. + CancelTrainingJobRequest: properties: job_uuid: type: string - description: The UUID of the job to cancel. - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: CancelTrainingJobRequest PostTrainingJobStatusResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current status of the training job + $ref: '#/components/schemas/JobStatus' scheduled_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job was scheduled + anyOf: + - type: string + format: date-time + - type: 'null' started_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job execution began + anyOf: + - type: string + format: date-time + - type: 'null' completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job finished, if completed + anyOf: + - type: string + format: date-time + - type: 'null' resources_allocated: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Information about computational resources allocated to the - job + anyOf: + - additionalProperties: true + type: object + - type: 'null' checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - status - - checkpoints + - job_uuid + - status title: PostTrainingJobStatusResponse description: Status of a finetuning job. ListPostTrainingJobsResponse: - type: object properties: data: - type: array items: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - additionalProperties: false + $ref: '#/components/schemas/PostTrainingJob' + type: array + title: Data + type: object required: - - data + - data title: ListPostTrainingJobsResponse DPOAlignmentConfig: - type: object properties: beta: type: number - description: Temperature parameter for the DPO loss + title: Beta loss_type: $ref: '#/components/schemas/DPOLossType' default: sigmoid - description: The type of loss function to use for DPO - additionalProperties: false + type: object required: - - beta - - loss_type + - beta title: DPOAlignmentConfig - description: >- - Configuration for Direct Preference Optimization (DPO) alignment. + description: Configuration for Direct Preference Optimization (DPO) alignment. DPOLossType: type: string enum: - - sigmoid - - hinge - - ipo - - kto_pair + - sigmoid + - hinge + - ipo + - kto_pair title: DPOLossType DataConfig: - type: object properties: dataset_id: type: string - description: >- - Unique identifier for the training dataset + title: Dataset Id batch_size: type: integer - description: Number of samples per training batch + title: Batch Size shuffle: type: boolean - description: >- - Whether to shuffle the dataset during training + title: Shuffle data_format: $ref: '#/components/schemas/DatasetFormat' - description: >- - Format of the dataset (instruct or dialog) validation_dataset_id: - type: string - description: >- - (Optional) Unique identifier for the validation dataset + anyOf: + - type: string + - type: 'null' packed: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to pack multiple samples into a single sequence for - efficiency train_on_input: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to compute loss on input tokens as well as output tokens - additionalProperties: false + type: object required: - - dataset_id - - batch_size - - shuffle - - data_format + - dataset_id + - batch_size + - shuffle + - data_format title: DataConfig - description: >- - Configuration for training data and data loading. + description: Configuration for training data and data loading. DatasetFormat: type: string enum: - - instruct - - dialog + - instruct + - dialog title: DatasetFormat description: Format of the training dataset. EfficiencyConfig: - type: object properties: enable_activation_checkpointing: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use activation checkpointing to reduce memory usage enable_activation_offloading: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload activations to CPU to save GPU memory memory_efficient_fsdp_wrap: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use memory-efficient FSDP wrapping fsdp_cpu_offload: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload FSDP parameters to CPU - additionalProperties: false - title: EfficiencyConfig - description: >- - Configuration for memory and compute efficiency optimizations. - OptimizerConfig: type: object + title: EfficiencyConfig + description: Configuration for memory and compute efficiency optimizations. + OptimizerConfig: properties: optimizer_type: $ref: '#/components/schemas/OptimizerType' - description: >- - Type of optimizer to use (adam, adamw, or sgd) lr: type: number - description: Learning rate for the optimizer + title: Lr weight_decay: type: number - description: >- - Weight decay coefficient for regularization + title: Weight Decay num_warmup_steps: type: integer - description: Number of steps for learning rate warmup - additionalProperties: false + title: Num Warmup Steps + type: object required: - - optimizer_type - - lr - - weight_decay - - num_warmup_steps + - optimizer_type + - lr + - weight_decay + - num_warmup_steps title: OptimizerConfig - description: >- - Configuration parameters for the optimization algorithm. + description: Configuration parameters for the optimization algorithm. OptimizerType: type: string enum: - - adam - - adamw - - sgd + - adam + - adamw + - sgd title: OptimizerType - description: >- - Available optimizer algorithms for training. + description: Available optimizer algorithms for training. TrainingConfig: - type: object properties: n_epochs: type: integer - description: Number of training epochs to run + title: N Epochs max_steps_per_epoch: type: integer + title: Max Steps Per Epoch default: 1 - description: Maximum number of steps to run per epoch gradient_accumulation_steps: type: integer + title: Gradient Accumulation Steps default: 1 - description: >- - Number of steps to accumulate gradients before updating max_validation_steps: - type: integer + anyOf: + - type: integer + - type: 'null' default: 1 - description: >- - (Optional) Maximum number of validation steps per epoch data_config: - $ref: '#/components/schemas/DataConfig' - description: >- - (Optional) Configuration for data loading and formatting + anyOf: + - $ref: '#/components/schemas/DataConfig' + title: DataConfig + - type: 'null' + title: DataConfig optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - description: >- - (Optional) Configuration for the optimization algorithm + anyOf: + - $ref: '#/components/schemas/OptimizerConfig' + title: OptimizerConfig + - type: 'null' + title: OptimizerConfig efficiency_config: - $ref: '#/components/schemas/EfficiencyConfig' - description: >- - (Optional) Configuration for memory and compute optimizations + anyOf: + - $ref: '#/components/schemas/EfficiencyConfig' + title: EfficiencyConfig + - type: 'null' + title: EfficiencyConfig dtype: - type: string + anyOf: + - type: string + - type: 'null' default: bf16 - description: >- - (Optional) Data type for model parameters (bf16, fp16, fp32) - additionalProperties: false - required: - - n_epochs - - max_steps_per_epoch - - gradient_accumulation_steps - title: TrainingConfig - description: >- - Comprehensive configuration for the training process. - PreferenceOptimizeRequest: type: object + required: + - n_epochs + title: TrainingConfig + description: Comprehensive configuration for the training process. + PreferenceOptimizeRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid finetuned_model: type: string - description: The model to fine-tune. + title: Finetuned Model algorithm_config: $ref: '#/components/schemas/DPOAlignmentConfig' - description: The algorithm configuration. training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - additionalProperties: false + title: Logger Config + type: object required: - - job_uuid - - finetuned_model - - algorithm_config - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config title: PreferenceOptimizeRequest PostTrainingJob: - type: object properties: job_uuid: type: string - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: PostTrainingJob AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' discriminator: - propertyName: type mapping: LoRA: '#/components/schemas/LoraFinetuningConfig' QAT: '#/components/schemas/QATFinetuningConfig' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + title: LoraFinetuningConfig | QATFinetuningConfig LoraFinetuningConfig: - type: object properties: type: type: string const: LoRA + title: Type default: LoRA - description: Algorithm type identifier, always "LoRA" lora_attn_modules: - type: array items: type: string - description: >- - List of attention module names to apply LoRA to + type: array + title: Lora Attn Modules apply_lora_to_mlp: type: boolean - description: Whether to apply LoRA to MLP layers + title: Apply Lora To Mlp apply_lora_to_output: type: boolean - description: >- - Whether to apply LoRA to output projection layers + title: Apply Lora To Output rank: type: integer - description: >- - Rank of the LoRA adaptation (lower rank = fewer parameters) + title: Rank alpha: type: integer - description: >- - LoRA scaling parameter that controls adaptation strength + title: Alpha use_dora: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) quantize_base: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to quantize the base model weights - additionalProperties: false - required: - - type - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - title: LoraFinetuningConfig - description: >- - Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - QATFinetuningConfig: type: object + required: + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: properties: type: type: string const: QAT + title: Type default: QAT - description: Algorithm type identifier, always "QAT" quantizer_name: type: string - description: >- - Name of the quantization algorithm to use + title: Quantizer Name group_size: type: integer - description: Size of groups for grouped quantization - additionalProperties: false - required: - - type - - quantizer_name - - group_size - title: QATFinetuningConfig - description: >- - Configuration for Quantization-Aware Training (QAT) fine-tuning. - SupervisedFineTuneRequest: + title: Group Size type: object + required: + - quantizer_name + - group_size + title: QATFinetuningConfig + description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. + title: Logger Config model: - type: string - description: The model to fine-tune. + anyOf: + - type: string + - type: 'null' + description: Model descriptor for training if not in provider config` checkpoint_dir: - type: string - description: The directory to save checkpoint(s) to. + anyOf: + - type: string + - type: 'null' algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' - description: The algorithm configuration. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + title: LoraFinetuningConfig | QATFinetuningConfig + - type: 'null' + title: Algorithm Config + type: object required: - - job_uuid - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - training_config + - hyperparam_search_config + - logger_config title: SupervisedFineTuneRequest + RegisterModelRequest: + properties: + model_id: + type: string + title: Model Id + provider_model_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + model_type: + anyOf: + - $ref: '#/components/schemas/ModelType' + title: ModelType + - type: 'null' + title: ModelType + type: object + required: + - model_id + title: RegisterModelRequest + ParamType: + discriminator: + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + propertyName: type + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + RegisterScoringFunctionRequest: + properties: + scoring_fn_id: + type: string + title: Scoring Fn Id + description: + type: string + title: Description + return_type: + anyOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + provider_scoring_fn_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + type: object + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + RegisterShieldRequest: + properties: + shield_id: + type: string + title: Shield Id + provider_shield_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - shield_id + title: RegisterShieldRequest + RegisterToolGroupRequest: + properties: + toolgroup_id: + type: string + title: Toolgroup Id + provider_id: + type: string + title: Provider Id + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + DataSource: + discriminator: + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + propertyName: type + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + RegisterDatasetRequest: + properties: + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + anyOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + dataset_id: + anyOf: + - type: string + - type: 'null' + type: object + required: + - purpose + - source + title: RegisterDatasetRequest + RegisterBenchmarkRequest: + properties: + benchmark_id: + type: string + title: Benchmark Id + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + provider_benchmark_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + AllowedToolsFilter: + properties: + tool_names: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: AllowedToolsFilter + description: Filter configuration for restricting which MCP tools can be used. + ApprovalFilter: + properties: + always: + anyOf: + - items: + type: string + type: array + - type: 'null' + never: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: ApprovalFilter + description: Filter configuration for MCP tool approval requirements. + BatchError: + properties: + code: + anyOf: + - type: string + - type: 'null' + line: + anyOf: + - type: integer + - type: 'null' + message: + anyOf: + - type: string + - type: 'null' + param: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: BatchError + BatchRequestCounts: + properties: + completed: + type: integer + title: Completed + failed: + type: integer + title: Failed + total: + type: integer + title: Total + additionalProperties: true + type: object + required: + - completed + - failed + - total + title: BatchRequestCounts + BatchUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + input_tokens_details: + $ref: '#/components/schemas/InputTokensDetails' + output_tokens: + type: integer + title: Output Tokens + output_tokens_details: + $ref: '#/components/schemas/OutputTokensDetails' + total_tokens: + type: integer + title: Total Tokens + additionalProperties: true + type: object + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + Body_openai_upload_file_v1_files_post: + properties: + file: + type: string + format: binary + title: File + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + anyOf: + - $ref: '#/components/schemas/ExpiresAfter' + title: ExpiresAfter + - type: 'null' + title: ExpiresAfter + type: object + required: + - file + - purpose + title: Body_openai_upload_file_v1_files_post + Chunk-Input: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + Chunk-Output: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + title: string | list[ImageContentItem-Output | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + ConversationItemInclude: + type: string + enum: + - web_search_call.action.sources + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + title: ConversationItemInclude + description: Specify additional output data to include in the model response. + DatasetPurpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + title: DatasetPurpose + description: Purpose of the dataset. Each purpose has a required input data schema. + Errors: + properties: + data: + anyOf: + - items: + $ref: '#/components/schemas/BatchError' + type: array + - type: 'null' + object: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: Errors + HealthStatus: + type: string + enum: + - OK + - Error + - Not Implemented + title: HealthStatus + ImageContentItem-Input: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + ImageContentItem-Output: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + InputTokensDetails: + properties: + cached_tokens: + type: integer + title: Cached Tokens + additionalProperties: true + type: object + required: + - cached_tokens + title: InputTokensDetails + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus + description: Status of a job execution. + MCPListToolsTool: + properties: + input_schema: + additionalProperties: true + type: object + title: Input Schema + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_schema + - name + title: MCPListToolsTool + description: Tool definition returned by MCP list tools operation. + OpenAIAssistantMessageParam-Input: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIAssistantMessageParam-Output: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIChatCompletionUsageCompletionTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsageCompletionTokensDetails + description: Token details for output tokens in OpenAI chat completion usage. + OpenAIChatCompletionUsagePromptTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsagePromptTokensDetails + description: Token details for prompt tokens in OpenAI chat completion usage. + OpenAIResponseMessage-Input: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseMessage-Output: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseOutputMessageFileSearchToolCallResults: + properties: + attributes: + additionalProperties: true + type: object + title: Attributes + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + text: + type: string + title: Text + type: object + required: + - attributes + - file_id + - filename + - score + - text + title: OpenAIResponseOutputMessageFileSearchToolCallResults + description: Search results returned by the file search operation. + OpenAIResponseTextFormat: + properties: + type: + title: Type + type: string + enum: + - text + - json_schema + - json_object + default: text + name: + anyOf: + - type: string + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + title: OpenAIResponseTextFormat + description: Configuration for Responses API text format. + OpenAIResponseUsageInputTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageInputTokensDetails + description: Token details for input tokens in OpenAI response usage. + OpenAIResponseUsageOutputTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageOutputTokensDetails + description: Token details for output tokens in OpenAI response usage. + OpenAIUserMessageParam-Input: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OpenAIUserMessageParam-Output: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OutputTokensDetails: + properties: + reasoning_tokens: + type: integer + title: Reasoning Tokens + additionalProperties: true + type: object + required: + - reasoning_tokens + title: OutputTokensDetails + SearchRankingOptions: + properties: + ranker: + anyOf: + - type: string + - type: 'null' + score_threshold: + anyOf: + - type: number + - type: 'null' + default: 0.0 + type: object + title: SearchRankingOptions + description: Options for ranking and filtering search results. + _URLOrData: + properties: + url: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + data: + anyOf: + - type: string + - type: 'null' + contentEncoding: base64 + type: object + title: _URLOrData + description: A URL or a base64 encoded string + SamplingStrategy: + discriminator: + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + propertyName: type + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + GrammarResponseFormat: + description: Configuration for grammar-guided response generation. + properties: + type: + const: grammar + default: grammar + title: Type + type: string + bnf: + additionalProperties: true + title: Bnf + type: object + required: + - bnf + title: GrammarResponseFormat + type: object + JsonSchemaResponseFormat: + description: Configuration for JSON schema-guided response generation. + properties: + type: + const: json_schema + default: json_schema + title: Type + type: string + json_schema: + additionalProperties: true + title: Json Schema + type: object + required: + - json_schema + title: JsonSchemaResponseFormat + type: object + ResponseFormat: + discriminator: + mapping: + grammar: '#/components/schemas/GrammarResponseFormat' + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + propertyName: type + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + title: JsonSchemaResponseFormat + - $ref: '#/components/schemas/GrammarResponseFormat' + title: GrammarResponseFormat + title: JsonSchemaResponseFormat | GrammarResponseFormat + OpenAIResponseContentPart: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + MetricInResponse: + description: A metric value included in API responses. + properties: + metric: + title: Metric + type: string + value: + anyOf: + - type: integer + - type: number + title: integer | number + unit: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - metric + - value + title: MetricInResponse + type: object + TextDelta: + description: A text content delta for streaming responses. + properties: + type: + const: text + default: text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: TextDelta + type: object + ImageDelta: + description: An image content delta for streaming responses. + properties: + type: + const: image + default: image + title: Type + type: string + image: + format: binary + title: Image + type: string + required: + - image + title: ImageDelta + type: object + Fp8QuantizationConfig: + description: Configuration for 8-bit floating point quantization. + properties: + type: + const: fp8_mixed + default: fp8_mixed + title: Type + type: string + title: Fp8QuantizationConfig + type: object + Bf16QuantizationConfig: + description: Configuration for BFloat16 precision (typically no quantization). + properties: + type: + const: bf16 + default: bf16 + title: Type + type: string + title: Bf16QuantizationConfig + type: object + Int4QuantizationConfig: + description: Configuration for 4-bit integer quantization. + properties: + type: + const: int4_mixed + default: int4_mixed + title: Type + type: string + scheme: + anyOf: + - type: string + - type: 'null' + default: int4_weight_int8_dynamic_activation + title: Int4QuantizationConfig + type: object + UserMessage: + description: A message from the user in a chat conversation. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + context: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem | TextContentItem] + nullable: true + required: + - content + title: UserMessage + type: object + ToolResponseMessage: + description: A message representing the result of a tool invocation. + properties: + role: + const: tool + default: tool + title: Role + type: string + call_id: + title: Call Id + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + required: + - call_id + - content + title: ToolResponseMessage + type: object + TokenLogProbs: + description: Log probabilities for generated tokens. + properties: + logprobs_by_token: + additionalProperties: + type: number + title: Logprobs By Token + type: object + required: + - logprobs_by_token + title: TokenLogProbs + type: object + EmbeddingsResponse: + description: Response containing generated embeddings. + properties: + embeddings: + items: + items: + type: number + type: array + title: Embeddings + type: array + required: + - embeddings + title: EmbeddingsResponse + type: object + OpenAICompletionLogprobs: + description: |- + The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + properties: + text_offset: + anyOf: + - items: + type: integer + type: array + - type: 'null' + nullable: true + token_logprobs: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + tokens: + anyOf: + - items: + type: string + type: array + - type: 'null' + nullable: true + top_logprobs: + anyOf: + - items: + additionalProperties: + type: number + type: object + type: array + - type: 'null' + nullable: true + title: OpenAICompletionLogprobs + type: object + VectorStoreCreateRequest: + description: Request to create a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + file_ids: + items: + type: string + title: File Ids + type: array + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + chunking_strategy: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + additionalProperties: true + title: Metadata + type: object + title: VectorStoreCreateRequest + type: object + VectorStoreModifyRequest: + description: Request to modify a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + title: VectorStoreModifyRequest + type: object + VectorStoreSearchRequest: + description: Request to search a vector store. + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + max_num_results: + default: 10 + title: Max Num Results + type: integer + ranking_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + rewrite_query: + default: false + title: Rewrite Query + type: boolean + required: + - query + title: VectorStoreSearchRequest + type: object + DialogType: + description: Parameter type for dialog data with semantic output labels. + properties: + type: + const: dialog + default: dialog + title: Type + type: string + title: DialogType + type: object + ConversationMessage: + description: OpenAI-compatible message item for conversations. + properties: + id: + description: unique identifier for this message + title: Id + type: string + content: + description: message content + items: + additionalProperties: true + type: object + title: Content + type: array + role: + description: message role + title: Role + type: string + status: + description: message status + title: Status + type: string + type: + const: message + default: message + title: Type + type: string + object: + const: message + default: message + title: Object + type: string + required: + - id + - content + - role + - status + title: ConversationMessage + type: object + ConversationItemCreateRequest: + description: Request body for creating conversation items. + properties: + items: + description: Items to include in the conversation context. You may add up to 20 items at a time. + items: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + maxItems: 20 + title: Items + type: array + required: + - items + title: ConversationItemCreateRequest + type: object + ToolGroupInput: + description: Input data for registering a tool group. + properties: + toolgroup_id: + title: Toolgroup Id + type: string + provider_id: + title: Provider Id + type: string + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + nullable: true + title: URL + required: + - toolgroup_id + - provider_id + title: ToolGroupInput + type: object + Api: + description: Enumeration of all available APIs in the Llama Stack system. + enum: + - providers + - inference + - safety + - agents + - batches + - vector_io + - datasetio + - scoring + - eval + - post_training + - tool_runtime + - models + - shields + - vector_stores + - datasets + - scoring_functions + - benchmarks + - tool_groups + - files + - prompts + - conversations + - inspect + title: Api + type: string + ProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + required: + - api + - provider_type + - config_class + title: ProviderSpec + type: object + InlineProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + container_image: + anyOf: + - type: string + - type: 'null' + description: |2 + + The container image to use for this implementation. If one is provided, pip_packages will be ignored. + If a provider depends on other providers, the dependencies MUST NOT specify a container image. + nullable: true + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + title: InlineProviderSpec + type: object + RemoteProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + adapter_type: + description: Unique identifier for this adapter + title: Adapter Type + type: string + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + - adapter_type + title: RemoteProviderSpec + type: object + PostTrainingJobLogStream: + description: Stream of logs from a finetuning job. + properties: + job_uuid: + title: Job Uuid + type: string + log_lines: + items: + type: string + title: Log Lines + type: array + required: + - job_uuid + - log_lines + title: PostTrainingJobLogStream + type: object + RLHFAlgorithm: + description: Available reinforcement learning from human feedback algorithms. + enum: + - dpo + title: RLHFAlgorithm + type: string + PostTrainingRLHFRequest: + description: Request to finetune a model using reinforcement learning from human feedback. + properties: + job_uuid: + title: Job Uuid + type: string + finetuned_model: + $ref: '#/components/schemas/URL' + dataset_id: + title: Dataset Id + type: string + validation_dataset_id: + title: Validation Dataset Id + type: string + algorithm: + $ref: '#/components/schemas/RLHFAlgorithm' + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + hyperparam_search_config: + additionalProperties: true + title: Hyperparam Search Config + type: object + logger_config: + additionalProperties: true + title: Logger Config + type: object + required: + - job_uuid + - finetuned_model + - dataset_id + - validation_dataset_id + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + title: PostTrainingRLHFRequest + type: object responses: BadRequest400: description: The request was invalid or malformed @@ -10140,8 +10034,7 @@ components: title: Bad Request detail: The request was invalid or malformed TooManyRequests429: - description: >- - The client has sent too many requests in a given amount of time + description: The client has sent too many requests in a given amount of time content: application/json: schema: @@ -10149,11 +10042,9 @@ components: example: status: 429 title: Too Many Requests - detail: >- - You have exceeded the rate limit. Please try again later. + detail: You have exceeded the rate limit. Please try again later. InternalServerError500: - description: >- - The server encountered an unexpected error + description: The server encountered an unexpected error content: application/json: schema: @@ -10161,91 +10052,101 @@ components: example: status: 500 title: Internal Server Error - detail: >- - An unexpected error occurred. Our team has been notified. + detail: An unexpected error occurred DefaultError: - description: An unexpected error occurred + description: An error occurred content: application/json: schema: $ref: '#/components/schemas/Error' - example: - status: 0 - title: Error - detail: An unexpected error occurred -security: - - Default: [] tags: - - name: Agents - description: > - APIs for creating and interacting with agentic systems. +- description: APIs for creating and interacting with agentic systems. + name: Agents + x-displayName: Agents +- description: |- + The API is designed to allow use of openai client libraries for seamless integration. + This API provides the following extensions: + - idempotent batch creation - ## Deprecated APIs + Note: This API is currently under active development and may undergo changes. + name: Batches + x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale. +- description: '' + name: Benchmarks +- description: Protocol for conversation management operations. + name: Conversations + x-displayName: Conversations +- description: '' + name: DatasetIO +- description: '' + name: Datasets +- description: Llama Stack Evaluation API for running evaluations on model and agent candidates. + name: Eval + x-displayName: Evaluations +- description: This API is used to upload documents that can be used with other Llama Stack APIs. + name: Files + x-displayName: Files +- description: |- + Llama Stack Inference API for generating completions, chat completions, and embeddings. - - > **⚠️ DEPRECATED**: These APIs are provided for migration reference and will - be removed in future versions. Not recommended for new projects. - - - ### Migration Guidance - - - If you are using deprecated versions of the Agents or Responses APIs, please - migrate to: - - - - **Responses API**: Use the stable v1 Responses API endpoints - x-displayName: Agents - - name: Benchmarks - description: '' - - name: DatasetIO - description: '' - - name: Datasets - description: '' - - name: Eval - description: >- - Llama Stack Evaluation API for running evaluations on model and agent candidates. - x-displayName: Evaluations - - name: Files - description: >- - This API is used to upload documents that can be used with other Llama Stack - APIs. - x-displayName: Files - - name: Inference - description: >- - Llama Stack Inference API for generating completions, chat completions, and - embeddings. - - - This API provides the raw interface to the underlying models. Two kinds of models - are supported: - - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - - Embedding models: these models generate embeddings to be used for semantic - search. - x-displayName: Inference - - name: Models - description: '' - - name: PostTraining (Coming Soon) - description: '' - - name: Safety - description: OpenAI-compatible Moderations API. - x-displayName: Safety - - name: VectorIO - description: '' + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + name: Inference + x-displayName: Inference +- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + name: Inspect + x-displayName: Inspect +- description: '' + name: Models +- description: '' + name: PostTraining (Coming Soon) +- description: Protocol for prompt management operations. + name: Prompts + x-displayName: Prompts +- description: Providers API for inspecting, listing, and modifying providers and their configurations. + name: Providers + x-displayName: Providers +- description: OpenAI-compatible Moderations API. + name: Safety + x-displayName: Safety +- description: '' + name: Scoring +- description: '' + name: ScoringFunctions +- description: '' + name: Shields +- description: '' + name: ToolGroups +- description: '' + name: ToolRuntime +- description: '' + name: VectorIO x-tagGroups: - - name: Operations - tags: - - Agents - - Benchmarks - - DatasetIO - - Datasets - - Eval - - Files - - Inference - - Models - - PostTraining (Coming Soon) - - Safety - - VectorIO +- name: Operations + tags: + - Agents + - Batches + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - ToolGroups + - ToolRuntime + - VectorIO +security: +- Default: [] diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html deleted file mode 100644 index ab474180e..000000000 --- a/docs/static/experimental-llama-stack-spec.html +++ /dev/null @@ -1,5553 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - - - - - - - diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index dd9e43cc5..4d5a43693 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -1,53 +1,53 @@ openapi: 3.1.0 info: - title: >- - Llama Stack Specification - Experimental APIs - version: v1 - description: >- + title: Llama Stack Specification - Experimental APIs + description: |- This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are - tailored to - best leverage Llama Models. + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. - **🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before - becoming stable. + **🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before + becoming stable. + version: v1 servers: - - url: http://any-hosted-llama-stack.com +- url: http://any-hosted-llama-stack.com paths: /v1beta/datasetio/append-rows/{dataset_id}: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - DatasetIO - summary: Append rows to a dataset. + - Datasetio + summary: Append Rows description: Append rows to a dataset. + operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to append the rows to. - required: true - schema: - type: string + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/AppendRowsRequest' required: true - deprecated: false /v1beta/datasetio/iterrows/{dataset_id}: get: responses: @@ -59,55 +59,53 @@ paths: $ref: '#/components/schemas/PaginatedResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - DatasetIO - summary: >- - Get a paginated list of rows from a dataset. - description: >- + - Datasetio + summary: Iterrows + description: |- Get a paginated list of rows from a dataset. Uses offset-based pagination where: - - start_index: The starting index (0-based). If None, starts from beginning. - - limit: Number of items to return. If None or -1, returns all items. - The response includes: - - data: List of items for the current page. - - has_more: Whether there are more items available after this set. + operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to get the rows from. - required: true - schema: - type: string - - name: start_index - in: query - description: >- - Index into dataset for the first row to get. Get all rows if None. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of rows to get. - required: false - schema: - type: integer - deprecated: false + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: start_index + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Start Index + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' /v1beta/datasets: get: responses: @@ -118,51 +116,22 @@ paths: schema: $ref: '#/components/schemas/ListDatasetsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: List all datasets. + - Datasets + summary: List Datasets description: List all datasets. - parameters: [] - deprecated: false - post: - responses: - '200': - description: A Dataset. - content: - application/json: - schema: - $ref: '#/components/schemas/Dataset' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: Register a new dataset. - description: Register a new dataset. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterDatasetRequest' - required: true - deprecated: false + operationId: list_datasets_v1beta_datasets_get /v1beta/datasets/{dataset_id}: get: responses: @@ -173,550 +142,29 @@ paths: schema: $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: Get a dataset by its ID. + - Datasets + summary: Get Dataset description: Get a dataset by its ID. + operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: - - name: dataset_id - in: path - description: The ID of the dataset to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Datasets - summary: Unregister a dataset by its ID. - description: Unregister a dataset by its ID. - parameters: - - name: dataset_id - in: path - description: The ID of the dataset to unregister. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all agents. - description: List all agents. - parameters: - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of agents to return. - required: false - schema: - type: integer - deprecated: false - post: - responses: - '200': - description: >- - An AgentCreateResponse with the agent ID. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Create an agent with the given configuration. - description: >- - Create an agent with the given configuration. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentRequest' + - name: dataset_id + in: path required: true - deprecated: false - /v1alpha/agents/{agent_id}: - get: - responses: - '200': - description: An Agent of the agent. - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Describe an agent by its ID. - description: Describe an agent by its ID. - parameters: - - name: agent_id - in: path - description: ID of the agent. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent by its ID and its associated sessions and turns. - description: >- - Delete an agent by its ID and its associated sessions and turns. - parameters: - - name: agent_id - in: path - description: The ID of the agent to delete. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session: - post: - responses: - '200': - description: An AgentSessionCreateResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentSessionCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new session for an agent. - description: Create a new session for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the session for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentSessionRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}: - get: - responses: - '200': - description: A Session. - content: - application/json: - schema: - $ref: '#/components/schemas/Session' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent session by its ID. - description: Retrieve an agent session by its ID. - parameters: - - name: session_id - in: path - description: The ID of the session to get. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to get the session for. - required: true - schema: - type: string - - name: turn_ids - in: query - description: >- - (Optional) List of turn IDs to filter the session by. - required: false - schema: - type: array - items: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent session by its ID and its associated turns. - description: >- - Delete an agent session by its ID and its associated turns. - parameters: - - name: session_id - in: path - description: The ID of the session to delete. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to delete the session for. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn: - post: - responses: - '200': - description: >- - If stream=False, returns a Turn object. If stream=True, returns an SSE - event stream of AgentTurnResponseStreamChunk. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new turn for an agent. - description: Create a new turn for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to create the turn for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}: - get: - responses: - '200': - description: A Turn. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent turn by its ID. - description: Retrieve an agent turn by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the turn for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: - post: - responses: - '200': - description: >- - A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk - objects. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Resume an agent turn with executed tool call responses. - description: >- - Resume an agent turn with executed tool call responses. - - When a Turn has the status `awaiting_input` due to pending input from client - side tool calls, this endpoint can be used to submit the outputs from the - tool calls once they are ready. - parameters: - - name: agent_id - in: path - description: The ID of the agent to resume. - required: true - schema: - type: string - - name: session_id - in: path - description: The ID of the session to resume. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to resume. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ResumeAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: - get: - responses: - '200': - description: An AgentStepResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentStepResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent step by its ID. - description: Retrieve an agent step by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the step for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the step for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get the step for. - required: true - schema: - type: string - - name: step_id - in: path - description: The ID of the step to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/sessions: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all session(s) of a given agent. - description: List all session(s) of a given agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to list sessions for. - required: true - schema: - type: string - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of sessions to return. - required: false - schema: - type: integer - deprecated: false + schema: + type: string + description: 'Path parameter: dataset_id' /v1alpha/eval/benchmarks: get: responses: @@ -727,47 +175,22 @@ paths: schema: $ref: '#/components/schemas/ListBenchmarksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: List all benchmarks. + - Benchmarks + summary: List Benchmarks description: List all benchmarks. - parameters: [] - deprecated: false - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: Register a benchmark. - description: Register a benchmark. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterBenchmarkRequest' - required: true - deprecated: false + operationId: list_benchmarks_v1alpha_eval_benchmarks_get /v1alpha/eval/benchmarks/{benchmark_id}: get: responses: @@ -778,131 +201,107 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: Get a benchmark by its ID. + - Benchmarks + summary: Get Benchmark description: Get a benchmark by its ID. + operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Benchmarks - summary: Unregister a benchmark. - description: Unregister a benchmark. - parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to unregister. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: responses: '200': - description: >- - EvaluateResponse object containing generations and scores. + description: EvaluateResponse object containing generations and scores. content: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Evaluate a list of rows on a benchmark. + - Eval + summary: Evaluate Rows description: Evaluate a list of rows on a benchmark. + operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/EvaluateRowsRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs: post: responses: '200': - description: >- - The job that was created to run the evaluation. + description: The job that was created to run the evaluation. content: application/json: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Run an evaluation on a benchmark. + - Eval + summary: Run Eval description: Run an evaluation on a benchmark. + operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/RunEvalRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -913,67 +312,69 @@ paths: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the status of a job. + - Eval + summary: Job Status description: Get the status of a job. + operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Eval - summary: Cancel a job. + - Eval + summary: Job Cancel description: Cancel a job. + operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: get: responses: @@ -984,68 +385,67 @@ paths: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the result of a job. + - Eval + summary: Job Result description: Get the result of a job. + operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/inference/rerank: post: responses: '200': - description: >- - RerankResponse with indices sorted by relevance score (descending). + description: RerankResponse with indices sorted by relevance score (descending). content: application/json: schema: $ref: '#/components/schemas/RerankResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: >- - Rerank a list of documents based on their relevance to a query. - description: >- - Rerank a list of documents based on their relevance to a query. - parameters: [] + - Inference + summary: Rerank + description: Rerank a list of documents based on their relevance to a query. + operationId: rerank_v1alpha_inference_rerank_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RerankRequest' required: true - deprecated: false /v1alpha/post-training/job/artifacts: get: responses: @@ -1057,54 +457,56 @@ paths: $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the artifacts of a training job. + - Post Training + summary: Get Training Job Artifacts description: Get the artifacts of a training job. + operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the artifacts of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/job/cancel: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - PostTraining (Coming Soon) - summary: Cancel a training job. + - Post Training + summary: Cancel Training Job description: Cancel a training job. - parameters: [] + operationId: cancel_training_job_v1alpha_post_training_job_cancel_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CancelTrainingJobRequest' required: true - deprecated: false /v1alpha/post-training/job/status: get: responses: @@ -1116,27 +518,28 @@ paths: $ref: '#/components/schemas/PostTrainingJobStatusResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the status of a training job. + - Post Training + summary: Get Training Job Status description: Get the status of a training job. + operationId: get_training_job_status_v1alpha_post_training_job_status_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/jobs: get: responses: @@ -1147,21 +550,22 @@ paths: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Get all training jobs. + - Post Training + summary: Get Training Jobs description: Get all training jobs. - parameters: [] - deprecated: false + operationId: get_training_jobs_v1alpha_post_training_jobs_get /v1alpha/post-training/preference-optimize: post: responses: @@ -1172,27 +576,28 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run preference optimization of a model. + - Post Training + summary: Preference Optimize description: Run preference optimization of a model. - parameters: [] + operationId: preference_optimize_v1alpha_post_training_preference_optimize_post requestBody: content: application/json: schema: $ref: '#/components/schemas/PreferenceOptimizeRequest' required: true - deprecated: false /v1alpha/post-training/supervised-fine-tune: post: responses: @@ -1203,2820 +608,8186 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run supervised fine-tuning of a model. + - Post Training + summary: Supervised Fine Tune description: Run supervised fine-tuning of a model. - parameters: [] + operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post requestBody: content: application/json: schema: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true - deprecated: false -jsonSchemaDialect: >- - https://json-schema.org/draft/2020-12/schema components: schemas: Error: - type: object + description: Error response from the API. Roughly follows RFC 7807. properties: status: + title: Status type: integer - description: HTTP status code title: + title: Title type: string - description: >- - Error title, a short summary of the error which is invariant for an error - type detail: + title: Detail type: string - description: >- - Error detail, a longer human-readable description of the error instance: - type: string - description: >- - (Optional) A URL which can be used to retrieve more information about - the specific occurrence of the error - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - status - - title - - detail + - status + - title + - detail title: Error - description: >- - Error response from the API. Roughly follows RFC 7807. - AppendRowsRequest: - type: object - properties: - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to append to the dataset. - additionalProperties: false - required: - - rows - title: AppendRowsRequest - PaginatedResponse: type: object + ListBatchesResponse: properties: + object: + type: string + const: list + title: Object + default: list data: - type: array items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The list of items for the current page + $ref: '#/components/schemas/Batch' + type: array + title: Data + description: List of batch objects + first_id: + anyOf: + - type: string + - type: 'null' + description: ID of the first batch in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: ID of the last batch in the list has_more: type: boolean - description: >- - Whether there are more items available after this set - url: - type: string - description: The URL for accessing this list - additionalProperties: false - required: - - data - - has_more - title: PaginatedResponse - description: >- - A generic paginated response that follows a simple format. - Dataset: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: dataset - default: dataset - description: >- - Type of resource, always 'dataset' for datasets - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - Purpose of the dataset indicating its intended use - source: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - description: >- - Data source configuration for the dataset - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the dataset - additionalProperties: false - required: - - identifier - - provider_id - - type - - purpose - - source - - metadata - title: Dataset - description: >- - Dataset resource for storing and accessing training or evaluation data. - RowsDataSource: - type: object - properties: - type: - type: string - const: rows - default: rows - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}]} ] - additionalProperties: false - required: - - type - - rows - title: RowsDataSource - description: A dataset stored in rows. - URIDataSource: - type: object - properties: - type: - type: string - const: uri - default: uri - uri: - type: string - description: >- - The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" - additionalProperties: false - required: - - type - - uri - title: URIDataSource - description: >- - A dataset that can be obtained from a URI. - ListDatasetsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Dataset' - description: List of datasets - additionalProperties: false - required: - - data - title: ListDatasetsResponse - description: Response from listing datasets. - DataSource: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - RegisterDatasetRequest: - type: object - properties: - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - The purpose of the dataset. One of: - "post-training/messages": The dataset - contains a messages column with list of messages for post-training. { - "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", - "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset - contains a question column and an answer column for evaluation. { "question": - "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": - The dataset contains a messages column with list of messages and an answer - column for evaluation. { "messages": [ {"role": "user", "content": "Hello, - my name is John Doe."}, {"role": "assistant", "content": "Hello, John - Doe. How can I help you today?"}, {"role": "user", "content": "What's - my name?"}, ], "answer": "John Doe" } - source: - $ref: '#/components/schemas/DataSource' - description: >- - The data source of the dataset. Ensure that the data source schema is - compatible with the purpose of the dataset. Examples: - { "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": - "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" - } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" - } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": - "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] - } ] } - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The metadata for the dataset. - E.g. {"description": "My dataset"}. - dataset_id: - type: string - description: >- - The ID of the dataset. If not provided, an ID will be generated. - additionalProperties: false - required: - - purpose - - source - title: RegisterDatasetRequest - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - description: >- - The model identifier to use for the agent - instructions: - type: string - description: The system instructions for the agent - name: - type: string - description: >- - Optional name for the agent, used in telemetry and identification - enable_session_persistence: - type: boolean + title: Has More + description: Whether there are more batches available default: false - description: >- - Optional flag indicating whether session data has to be persisted - response_format: - $ref: '#/components/schemas/ResponseFormat' - description: Optional response format configuration - additionalProperties: false + type: object required: - - model - - instructions - title: AgentConfig - description: Configuration for an agent. - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - GrammarResponseFormat: - type: object + - data + title: ListBatchesResponse + description: Response containing a list of batch objects. + Batch: properties: - type: + id: type: string - enum: - - json_schema - - grammar - description: >- - Must be "grammar" to identify this format type - const: grammar - default: grammar - bnf: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The BNF grammar specification the response should conform to - additionalProperties: false - required: - - type - - bnf - title: GrammarResponseFormat - description: >- - Configuration for grammar-guided response generation. - GreedySamplingStrategy: - type: object - properties: - type: + title: Id + completion_window: type: string - const: greedy - default: greedy - description: >- - Must be "greedy" to identify this sampling strategy - additionalProperties: false - required: - - type - title: GreedySamplingStrategy - description: >- - Greedy sampling strategy that selects the highest probability token at each - step. - JsonSchemaResponseFormat: - type: object - properties: - type: - type: string - enum: - - json_schema - - grammar - description: >- - Must be "json_schema" to identify this format type - const: json_schema - default: json_schema - json_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. - additionalProperties: false - required: - - type - - json_schema - title: JsonSchemaResponseFormat - description: >- - Configuration for JSON schema-guided response generation. - ResponseFormat: - oneOf: - - $ref: '#/components/schemas/JsonSchemaResponseFormat' - - $ref: '#/components/schemas/GrammarResponseFormat' - discriminator: - propertyName: type - mapping: - json_schema: '#/components/schemas/JsonSchemaResponseFormat' - grammar: '#/components/schemas/GrammarResponseFormat' - SamplingParams: - type: object - properties: - strategy: - oneOf: - - $ref: '#/components/schemas/GreedySamplingStrategy' - - $ref: '#/components/schemas/TopPSamplingStrategy' - - $ref: '#/components/schemas/TopKSamplingStrategy' - discriminator: - propertyName: type - mapping: - greedy: '#/components/schemas/GreedySamplingStrategy' - top_p: '#/components/schemas/TopPSamplingStrategy' - top_k: '#/components/schemas/TopKSamplingStrategy' - description: The sampling strategy. - max_tokens: - type: integer - default: 0 - description: >- - The maximum number of tokens that can be generated in the completion. - The token count of your prompt plus max_tokens cannot exceed the model's - context length. - repetition_penalty: - type: number - default: 1.0 - description: >- - Number between -2.0 and 2.0. Positive values penalize new tokens based - on whether they appear in the text so far, increasing the model's likelihood - to talk about new topics. - stop: - type: array - items: - type: string - description: >- - Up to 4 sequences where the API will stop generating further tokens. The - returned text will not contain the stop sequence. - additionalProperties: false - required: - - strategy - title: SamplingParams - description: Sampling parameters. - ToolConfig: - type: object - properties: - tool_choice: - oneOf: - - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following - capabilities of the model. - - type: string - default: auto - description: >- - (Optional) Whether tool use is automatic, required, or none. Can also - specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - (Optional) Instructs the model how to format tool calls. By default, Llama - Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a - tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python - syntax -- a list of function calls. - system_message_behavior: - type: string - enum: - - append - - replace - description: >- - (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: - Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: - Replaces the default system prompt with the provided system message. The - system message can include the string '{{function_definitions}}' to indicate - where the function definitions should be inserted. - default: append - additionalProperties: false - title: ToolConfig - description: Configuration for tool use. - ToolDef: - type: object - properties: - toolgroup_id: - type: string - description: >- - (Optional) ID of the tool group this tool belongs to - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Human-readable description of what the tool does - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool inputs (MCP inputSchema) - output_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool outputs (MCP outputSchema) - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - name - title: ToolDef - description: >- - Tool definition used in runtime contexts. - TopKSamplingStrategy: - type: object - properties: - type: - type: string - const: top_k - default: top_k - description: >- - Must be "top_k" to identify this sampling strategy - top_k: - type: integer - description: >- - Number of top tokens to consider for sampling. Must be at least 1 - additionalProperties: false - required: - - type - - top_k - title: TopKSamplingStrategy - description: >- - Top-k sampling strategy that restricts sampling to the k most likely tokens. - TopPSamplingStrategy: - type: object - properties: - type: - type: string - const: top_p - default: top_p - description: >- - Must be "top_p" to identify this sampling strategy - temperature: - type: number - description: >- - Controls randomness in sampling. Higher values increase randomness - top_p: - type: number - default: 0.95 - description: >- - Cumulative probability threshold for nucleus sampling. Defaults to 0.95 - additionalProperties: false - required: - - type - title: TopPSamplingStrategy - description: >- - Top-p (nucleus) sampling strategy that samples from the smallest set of tokens - with cumulative probability >= p. - CreateAgentRequest: - type: object - properties: - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: The configuration for the agent. - additionalProperties: false - required: - - agent_config - title: CreateAgentRequest - AgentCreateResponse: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the created agent - additionalProperties: false - required: - - agent_id - title: AgentCreateResponse - description: >- - Response returned when creating a new agent. - Agent: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the agent - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: Configuration settings for the agent + title: Completion Window created_at: - type: string - format: date-time - description: Timestamp when the agent was created - additionalProperties: false - required: - - agent_id - - agent_config - - created_at - title: Agent - description: >- - An agent instance with configuration and metadata. - CreateAgentSessionRequest: - type: object - properties: - session_name: - type: string - description: The name of the session to create. - additionalProperties: false - required: - - session_name - title: CreateAgentSessionRequest - AgentSessionCreateResponse: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the created session - additionalProperties: false - required: - - session_id - title: AgentSessionCreateResponse - description: >- - Response returned when creating a new agent session. - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - ImageContentItem: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the content item. Always "image" - image: - type: object - properties: - url: - $ref: '#/components/schemas/URL' - description: >- - A URL of the image or data URL in the format of data:image/{type};base64,{data}. - Note that URL could have length limits. - data: - type: string - contentEncoding: base64 - description: base64 encoded image data as string - additionalProperties: false - description: >- - Image as a base64 encoded string or an URL - additionalProperties: false - required: - - type - - image - title: ImageContentItem - description: A image content item - InferenceStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: inference - default: inference - model_response: - $ref: '#/components/schemas/CompletionMessage' - description: The response from the LLM. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - model_response - title: InferenceStep - description: An inference step in an agent turn. - InterleavedContent: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - InterleavedContentItem: - oneOf: - - $ref: '#/components/schemas/ImageContentItem' - - $ref: '#/components/schemas/TextContentItem' - discriminator: - propertyName: type - mapping: - image: '#/components/schemas/ImageContentItem' - text: '#/components/schemas/TextContentItem' - MemoryRetrievalStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: memory_retrieval - default: memory_retrieval - vector_db_ids: - type: string - description: >- - The IDs of the vector databases to retrieve context from. - inserted_context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The context retrieved from the vector databases. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - vector_db_ids - - inserted_context - title: MemoryRetrievalStep - description: >- - A memory retrieval step in an agent turn. - SafetyViolation: - type: object - properties: - violation_level: - $ref: '#/components/schemas/ViolationLevel' - description: Severity level of the violation - user_message: - type: string - description: >- - (Optional) Message to convey to the user about the violation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata including specific violation codes for debugging and - telemetry - additionalProperties: false - required: - - violation_level - - metadata - title: SafetyViolation - description: >- - Details of a safety violation detected by content moderation. - Session: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the conversation session - session_name: - type: string - description: Human-readable name for the session - turns: - type: array - items: - $ref: '#/components/schemas/Turn' - description: >- - List of all turns that have occurred in this session - started_at: - type: string - format: date-time - description: Timestamp when the session was created - additionalProperties: false - required: - - session_id - - session_name - - turns - - started_at - title: Session - description: >- - A single session of an interaction with an Agentic System. - ShieldCallStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: shield_call - default: shield_call - violation: - $ref: '#/components/schemas/SafetyViolation' - description: The violation from the shield call. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - title: ShieldCallStep - description: A shield call step in an agent turn. - TextContentItem: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the content item. Always "text" - text: - type: string - description: Text content - additionalProperties: false - required: - - type - - text - title: TextContentItem - description: A text content item - ToolCall: - type: object - properties: - call_id: - type: string - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - arguments: - type: string - additionalProperties: false - required: - - call_id - - tool_name - - arguments - title: ToolCall - ToolExecutionStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: tool_execution - default: tool_execution - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: The tool calls to execute. - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: The tool responses from the tool calls. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - tool_calls - - tool_responses - title: ToolExecutionStep - description: A tool execution step in an agent turn. - ToolResponse: - type: object - properties: - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - description: Name of the tool that was invoked - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool response - additionalProperties: false - required: - - call_id - - tool_name - - content - title: ToolResponse - description: Response from a tool invocation. - ToolResponseMessage: - type: object - properties: - role: - type: string - const: tool - default: tool - description: >- - Must be "tool" to identify this as a tool response - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - additionalProperties: false - required: - - role - - call_id - - content - title: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - Turn: - type: object - properties: - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - session_id: - type: string - description: >- - Unique identifier for the conversation session - input_messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: >- - List of messages that initiated this turn - steps: - type: array - items: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - Ordered list of processing steps executed during this turn - output_message: - $ref: '#/components/schemas/CompletionMessage' - description: >- - The model's generated response containing content and metadata - output_attachments: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the attachment. - mime_type: - type: string - description: The MIME type of the attachment. - additionalProperties: false - required: - - content - - mime_type - title: Attachment - description: An attachment to an agent turn. - description: >- - (Optional) Files or media attached to the agent's response - started_at: - type: string - format: date-time - description: Timestamp when the turn began - completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the turn finished, if completed - additionalProperties: false - required: - - turn_id - - session_id - - input_messages - - steps - - output_message - - started_at - title: Turn - description: >- - A single turn in an interaction with an Agentic System. - URL: - type: object - properties: - uri: - type: string - description: The URL string pointing to the resource - additionalProperties: false - required: - - uri - title: URL - description: A URL reference to external content. - UserMessage: - type: object - properties: - role: - type: string - const: user - default: user - description: >- - Must be "user" to identify this as a user message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the message, which can include text and other media - context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) This field is used internally by Llama Stack to pass RAG context. - This field may be removed in the API in the future. - additionalProperties: false - required: - - role - - content - title: UserMessage - description: >- - A message from the user in a chat conversation. - ViolationLevel: - type: string - enum: - - info - - warn - - error - title: ViolationLevel - description: Severity level of a safety violation. - CreateAgentTurnRequest: - type: object - properties: - messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: List of messages to start the turn with. - stream: - type: boolean - description: >- - (Optional) If True, generate an SSE event stream of the response. Defaults - to False. - documents: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - additionalProperties: false - required: - - content - - mime_type - title: Document - description: A document to be used by an agent. - description: >- - (Optional) List of documents to create the turn with. - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - description: >- - (Optional) List of toolgroups to create the turn with, will be used in - addition to the agent's config toolgroups for the request. - tool_config: - $ref: '#/components/schemas/ToolConfig' - description: >- - (Optional) The tool configuration to create the turn with, will be used - to override the agent's tool_config. - additionalProperties: false - required: - - messages - title: CreateAgentTurnRequest - AgentTurnResponseEvent: - type: object - properties: - payload: - oneOf: - - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - discriminator: - propertyName: event_type - mapping: - step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' - step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' - step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' - turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' - turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - description: >- - Event-specific payload containing event data - additionalProperties: false - required: - - payload - title: AgentTurnResponseEvent - description: >- - An event in an agent turn response stream. - AgentTurnResponseStepCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_complete - default: step_complete - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - step_details: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: Complete details of the executed step - additionalProperties: false - required: - - event_type - - step_type - - step_id - - step_details - title: AgentTurnResponseStepCompletePayload - description: >- - Payload for step completion events in agent turn responses. - AgentTurnResponseStepProgressPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_progress - default: step_progress - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - delta: - oneOf: - - $ref: '#/components/schemas/TextDelta' - - $ref: '#/components/schemas/ImageDelta' - - $ref: '#/components/schemas/ToolCallDelta' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/TextDelta' - image: '#/components/schemas/ImageDelta' - tool_call: '#/components/schemas/ToolCallDelta' - description: >- - Incremental content changes during step execution - additionalProperties: false - required: - - event_type - - step_type - - step_id - - delta - title: AgentTurnResponseStepProgressPayload - description: >- - Payload for step progress events in agent turn responses. - AgentTurnResponseStepStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_start - default: step_start - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata for the step - additionalProperties: false - required: - - event_type - - step_type - - step_id - title: AgentTurnResponseStepStartPayload - description: >- - Payload for step start events in agent turn responses. - AgentTurnResponseStreamChunk: - type: object - properties: - event: - $ref: '#/components/schemas/AgentTurnResponseEvent' - description: >- - Individual event in the agent turn response stream - additionalProperties: false - required: - - event - title: AgentTurnResponseStreamChunk - description: Streamed agent turn completion response. - "AgentTurnResponseTurnAwaitingInputPayload": - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_awaiting_input - default: turn_awaiting_input - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Turn data when waiting for external tool responses - additionalProperties: false - required: - - event_type - - turn - title: >- - AgentTurnResponseTurnAwaitingInputPayload - description: >- - Payload for turn awaiting input events in agent turn responses. - AgentTurnResponseTurnCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_complete - default: turn_complete - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Complete turn data including all steps and results - additionalProperties: false - required: - - event_type - - turn - title: AgentTurnResponseTurnCompletePayload - description: >- - Payload for turn completion events in agent turn responses. - AgentTurnResponseTurnStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_start - default: turn_start - description: Type of event being reported - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - additionalProperties: false - required: - - event_type - - turn_id - title: AgentTurnResponseTurnStartPayload - description: >- - Payload for turn start events in agent turn responses. - ImageDelta: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the delta. Always "image" - image: - type: string - contentEncoding: base64 - description: The incremental image data as bytes - additionalProperties: false - required: - - type - - image - title: ImageDelta - description: >- - An image content delta for streaming responses. - TextDelta: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the delta. Always "text" - text: - type: string - description: The incremental text content - additionalProperties: false - required: - - type - - text - title: TextDelta - description: >- - A text content delta for streaming responses. - ToolCallDelta: - type: object - properties: - type: - type: string - const: tool_call - default: tool_call - description: >- - Discriminator type of the delta. Always "tool_call" - tool_call: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCall' - description: >- - Either an in-progress tool call string or the final parsed tool call - parse_status: - type: string - enum: - - started - - in_progress - - failed - - succeeded - description: Current parsing status of the tool call - additionalProperties: false - required: - - type - - tool_call - - parse_status - title: ToolCallDelta - description: >- - A tool call content delta for streaming responses. - ResumeAgentTurnRequest: - type: object - properties: - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: >- - The tool call responses to resume the turn with. - stream: - type: boolean - description: Whether to stream the response. - additionalProperties: false - required: - - tool_responses - title: ResumeAgentTurnRequest - AgentStepResponse: - type: object - properties: - step: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - The complete step data and execution details - additionalProperties: false - required: - - step - title: AgentStepResponse - description: >- - Response containing details of a specific agent step. - Benchmark: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: benchmark - default: benchmark - description: The resource type, always benchmark - dataset_id: - type: string - description: >- - Identifier of the dataset to use for the benchmark evaluation - scoring_functions: - type: array - items: - type: string - description: >- - List of scoring function identifiers to apply during evaluation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Metadata for this evaluation task - additionalProperties: false - required: - - identifier - - provider_id - - type - - dataset_id - - scoring_functions - - metadata - title: Benchmark - description: >- - A benchmark resource for evaluating model performance. - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - RegisterBenchmarkRequest: - type: object - properties: - benchmark_id: - type: string - description: The ID of the benchmark to register. - dataset_id: - type: string - description: >- - The ID of the dataset to use for the benchmark. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the benchmark. - provider_benchmark_id: - type: string - description: >- - The ID of the provider benchmark to use for the benchmark. - provider_id: - type: string - description: >- - The ID of the provider to use for the benchmark. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The metadata to use for the benchmark. - additionalProperties: false - required: - - benchmark_id - - dataset_id - - scoring_functions - title: RegisterBenchmarkRequest - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - AggregationFunctionType: - type: string - enum: - - average - - weighted_average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: >- - Types of aggregation functions for scoring results. - BasicScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: basic - default: basic - description: >- - The type of scoring function parameters, always basic - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - aggregation_functions - title: BasicScoringFnParams - description: >- - Parameters for basic scoring function configuration. - BenchmarkConfig: - type: object - properties: - eval_candidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - description: The candidate to evaluate. - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - Map between scoring function id and parameters for each scoring function - you want to run - num_examples: type: integer - description: >- - (Optional) The number of examples to evaluate. If not provided, all examples - in the dataset will be evaluated - additionalProperties: false - required: - - eval_candidate - - scoring_params - title: BenchmarkConfig - description: >- - A benchmark configuration for evaluation. - LLMAsJudgeScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: llm_as_judge - default: llm_as_judge - description: >- - The type of scoring function parameters, always llm_as_judge - judge_model: + title: Created At + endpoint: type: string - description: >- - Identifier of the LLM model to use as a judge for scoring - prompt_template: + title: Endpoint + input_file_id: type: string - description: >- - (Optional) Custom prompt template for the judge model - judge_score_regexes: - type: array - items: - type: string - description: >- - Regexes to extract the answer from generated response - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - judge_model - - judge_score_regexes - - aggregation_functions - title: LLMAsJudgeScoringFnParams - description: >- - Parameters for LLM-as-judge scoring function configuration. - ModelCandidate: - type: object - properties: - type: + title: Input File Id + object: type: string - const: model - default: model - model: - type: string - description: The model ID to evaluate. - sampling_params: - $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. - system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false - required: - - type - - model - - sampling_params - title: ModelCandidate - description: A model candidate for evaluation. - RegexParserScoringFnParams: - type: object - properties: - type: - $ref: '#/components/schemas/ScoringFnParamsType' - const: regex_parser - default: regex_parser - description: >- - The type of scoring function parameters, always regex_parser - parsing_regexes: - type: array - items: - type: string - description: >- - Regex to extract the answer from generated response - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - parsing_regexes - - aggregation_functions - title: RegexParserScoringFnParams - description: >- - Parameters for regex parser scoring function configuration. - ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' - discriminator: - propertyName: type - mapping: - llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' - regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' - ScoringFnParamsType: - type: string - enum: - - llm_as_judge - - regex_parser - - basic - title: ScoringFnParamsType - description: >- - Types of scoring function parameter configurations. - SystemMessage: - type: object - properties: - role: - type: string - const: system - default: system - description: >- - Must be "system" to identify this as a system message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). - additionalProperties: false - required: - - role - - content - title: SystemMessage - description: >- - A system message providing instructions or context to the model. - EvaluateRowsRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to evaluate. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the evaluation. - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - input_rows - - scoring_functions - - benchmark_config - title: EvaluateRowsRequest - EvaluateResponse: - type: object - properties: - generations: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The generations from the evaluation. - scores: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: The scores from the evaluation. - additionalProperties: false - required: - - generations - - scores - title: EvaluateResponse - description: The response from an evaluation. - ScoringResult: - type: object - properties: - score_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. - aggregated_results: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false - required: - - score_rows - - aggregated_results - title: ScoringResult - description: A scoring result for a single row. - RunEvalRequest: - type: object - properties: - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - benchmark_config - title: RunEvalRequest - Job: - type: object - properties: - job_id: - type: string - description: Unique identifier for the job + const: batch + title: Object status: type: string enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current execution status of the job - additionalProperties: false - required: - - job_id - - status - title: Job - description: >- - A job execution instance with status tracking. - "OpenAIChatCompletionContentPartImageParam": + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + title: Status + cancelled_at: + anyOf: + - type: integer + - type: 'null' + cancelling_at: + anyOf: + - type: integer + - type: 'null' + completed_at: + anyOf: + - type: integer + - type: 'null' + error_file_id: + anyOf: + - type: string + - type: 'null' + errors: + anyOf: + - $ref: '#/components/schemas/Errors' + title: Errors + - type: 'null' + title: Errors + expired_at: + anyOf: + - type: integer + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + failed_at: + anyOf: + - type: integer + - type: 'null' + finalizing_at: + anyOf: + - type: integer + - type: 'null' + in_progress_at: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + model: + anyOf: + - type: string + - type: 'null' + output_file_id: + anyOf: + - type: string + - type: 'null' + request_counts: + anyOf: + - $ref: '#/components/schemas/BatchRequestCounts' + title: BatchRequestCounts + - type: 'null' + title: BatchRequestCounts + usage: + anyOf: + - $ref: '#/components/schemas/BatchUsage' + title: BatchUsage + - type: 'null' + title: BatchUsage + additionalProperties: true type: object + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + ListOpenAIChatCompletionResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIChatCompletionResponse + description: Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + properties: + role: + const: assistant + default: assistant + title: Role + type: string + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + nullable: true + name: + anyOf: + - type: string + - type: 'null' + nullable: true + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true + title: OpenAIAssistantMessageParam + type: object + OpenAIChatCompletionContentPartImageParam: properties: type: type: string const: image_url + title: Type default: image_url - description: >- - Must be "image_url" to identify this as image content image_url: $ref: '#/components/schemas/OpenAIImageURL' - description: >- - Image URL specification and processing details - additionalProperties: false - required: - - type - - image_url - title: >- - OpenAIChatCompletionContentPartImageParam - description: >- - Image content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionContentPartTextParam: type: object + required: + - image_url + title: OpenAIChatCompletionContentPartImageParam + description: Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + OpenAIChatCompletionContentPartTextParam: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to identify this as text content text: type: string - description: The text content of the message - additionalProperties: false - required: - - type - - text - title: OpenAIChatCompletionContentPartTextParam - description: >- - Text content part for OpenAI-compatible chat completion messages. - OpenAIImageURL: + title: Text type: object + required: + - text + title: OpenAIChatCompletionContentPartTextParam + description: Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: + properties: + index: + anyOf: + - type: integer + - type: 'null' + id: + anyOf: + - type: string + - type: 'null' + type: + type: string + const: function + title: Type + default: function + function: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + title: OpenAIChatCompletionToolCallFunction + - type: 'null' + title: OpenAIChatCompletionToolCallFunction + type: object + title: OpenAIChatCompletionToolCall + description: Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: + properties: + name: + anyOf: + - type: string + - type: 'null' + arguments: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIChatCompletionToolCallFunction + description: Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: + properties: + prompt_tokens: + type: integer + title: Prompt Tokens + completion_tokens: + type: integer + title: Completion Tokens + total_tokens: + type: integer + title: Total Tokens + prompt_tokens_details: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails' + title: OpenAIChatCompletionUsagePromptTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsagePromptTokensDetails + completion_tokens_details: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails' + title: OpenAIChatCompletionUsageCompletionTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsageCompletionTokensDetails + type: object + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: Usage information for OpenAI chat completion. + OpenAIChoice: + properties: + message: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam-Output | ... (5 variants) + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + finish_reason: + type: string + title: Finish Reason + index: + type: integer + title: Index + logprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs + type: object + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: + properties: + content: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' + refusal: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' + type: object + title: OpenAIChoiceLogprobs + description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + OpenAIDeveloperMessageParam: + properties: + role: + type: string + const: developer + title: Role + default: developer + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIDeveloperMessageParam + description: A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: + properties: + type: + type: string + const: file + title: Type + default: file + file: + $ref: '#/components/schemas/OpenAIFileFile' + type: object + required: + - file + title: OpenAIFile + OpenAIFileFile: + properties: + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIFileFile + OpenAIImageURL: properties: url: type: string - description: >- - URL of the image to include in the message + title: Url detail: - type: string - description: >- - (Optional) Level of detail for image processing. Can be "low", "high", - or "auto" - additionalProperties: false - required: - - url - title: OpenAIImageURL - description: >- - Image URL specification for OpenAI-compatible chat completion messages. - RerankRequest: + anyOf: + - type: string + - type: 'null' type: object + required: + - url + title: OpenAIImageURL + description: Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + discriminator: + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam' + propertyName: role + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + title: OpenAIUserMessageParam + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + title: OpenAIAssistantMessageParam + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam | ... (5 variants) + OpenAISystemMessageParam: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAISystemMessageParam + description: A system message providing instructions or context to the model. + OpenAITokenLogProb: + properties: + token: + type: string + title: Token + bytes: + anyOf: + - items: + type: integer + type: array + - type: 'null' + logprob: + type: number + title: Logprob + top_logprobs: + items: + $ref: '#/components/schemas/OpenAITopLogProb' + type: array + title: Top Logprobs + type: object + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: |- + The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + OpenAIToolMessageParam: + properties: + role: + type: string + const: tool + title: Role + default: tool + tool_call_id: + type: string + title: Tool Call Id + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] + type: object + required: + - tool_call_id + - content + title: OpenAIToolMessageParam + description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + OpenAITopLogProb: + properties: + token: + type: string + title: Token + bytes: + anyOf: + - items: + type: integer + type: array + - type: 'null' + logprob: + type: number + title: Logprob + type: object + required: + - token + - logprob + title: OpenAITopLogProb + description: |- + The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + OpenAIUserMessageParam: + description: A message from the user in an OpenAI-compatible chat completion request. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - content + title: OpenAIUserMessageParam + type: object + OpenAIJSONSchema: + properties: + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: OpenAIJSONSchema + description: JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: + properties: + type: + type: string + const: json_object + title: Type + default: json_object + type: object + title: OpenAIResponseFormatJSONObject + description: JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: + properties: + type: + type: string + const: json_schema + title: Type + default: json_schema + json_schema: + $ref: '#/components/schemas/OpenAIJSONSchema' + type: object + required: + - json_schema + title: OpenAIResponseFormatJSONSchema + description: JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + discriminator: + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + OpenAIResponseFormatText: + properties: + type: + type: string + const: text + title: Type + default: text + type: object + title: OpenAIResponseFormatText + description: Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the reranking model to use. - query: - oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - The search query to rank items against. Can be a string, text content - part, or image content part. The input must not exceed the model's max - input token length. - items: - type: array + title: Model + messages: items: oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - List of items to rerank. Each item can be a string, text content part, - or image content part. Each input must not exceed the model's max input - token length. - max_num_results: - type: integer - description: >- - (Optional) Maximum number of results to return. Default: returns all. - additionalProperties: false - required: - - model - - query - - items - title: RerankRequest - RerankData: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) + type: array + minItems: 1 + title: Messages + frequency_penalty: + anyOf: + - type: number + - type: 'null' + function_call: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + functions: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_completion_tokens: + anyOf: + - type: integer + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + response_format: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + discriminator: + propertyName: type + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + - type: 'null' + title: Response Format + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + tools: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + top_logprobs: + anyOf: + - type: integer + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + user: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: properties: + id: + type: string + title: Id + choices: + items: + $ref: '#/components/schemas/OpenAIChoice' + type: array + title: Choices + object: + type: string + const: chat.completion + title: Object + default: chat.completion + created: + type: integer + title: Created + model: + type: string + title: Model + usage: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage + type: object + required: + - id + - choices + - created + - model + title: OpenAIChatCompletion + description: Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + description: Chunk from a streaming response to an OpenAI-compatible chat completion request. + properties: + id: + title: Id + type: string + choices: + items: + $ref: '#/components/schemas/OpenAIChunkChoice' + title: Choices + type: array + object: + const: chat.completion.chunk + default: chat.completion.chunk + title: Object + type: string + created: + title: Created + type: integer + model: + title: Model + type: string + usage: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + nullable: true + title: OpenAIChatCompletionUsage + required: + - id + - choices + - created + - model + title: OpenAIChatCompletionChunk + type: object + OpenAIChoiceDelta: + description: A delta from an OpenAI-compatible chat completion streaming response. + properties: + content: + anyOf: + - type: string + - type: 'null' + nullable: true + refusal: + anyOf: + - type: string + - type: 'null' + nullable: true + role: + anyOf: + - type: string + - type: 'null' + nullable: true + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true + reasoning_content: + anyOf: + - type: string + - type: 'null' + nullable: true + title: OpenAIChoiceDelta + type: object + OpenAIChunkChoice: + description: A chunk choice from an OpenAI-compatible chat completion streaming response. + properties: + delta: + $ref: '#/components/schemas/OpenAIChoiceDelta' + finish_reason: + title: Finish Reason + type: string + index: + title: Index + type: integer + logprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + nullable: true + title: OpenAIChoiceLogprobs + required: + - delta + - finish_reason + - index + title: OpenAIChunkChoice + type: object + OpenAICompletionWithInputMessages: + properties: + id: + type: string + title: Id + choices: + items: + $ref: '#/components/schemas/OpenAIChoice' + type: array + title: Choices + object: + type: string + const: chat.completion + title: Object + default: chat.completion + created: + type: integer + title: Created + model: + type: string + title: Model + usage: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage + input_messages: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output | ... (5 variants) + type: array + title: Input Messages + type: object + required: + - id + - choices + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + OpenAICompletionRequestWithExtraBody: + properties: + model: + type: string + title: Model + prompt: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - items: + type: integer + type: array + title: list[integer] + - items: + items: + type: integer + type: array + type: array + title: list[array] + title: string | ... (4 variants) + best_of: + anyOf: + - type: integer + - type: 'null' + echo: + anyOf: + - type: boolean + - type: 'null' + frequency_penalty: + anyOf: + - type: number + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + user: + anyOf: + - type: string + - type: 'null' + suffix: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: + properties: + id: + type: string + title: Id + choices: + items: + $ref: '#/components/schemas/OpenAICompletionChoice' + type: array + title: Choices + created: + type: integer + title: Created + model: + type: string + title: Model + object: + type: string + const: text_completion + title: Object + default: text_completion + type: object + required: + - id + - choices + - created + - model + title: OpenAICompletion + description: |- + Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + OpenAICompletionChoice: + properties: + finish_reason: + type: string + title: Finish Reason + text: + type: string + title: Text index: type: integer - description: >- - The original index of the document in the input list - relevance_score: - type: number - description: >- - The relevance score from the model output. Values are inverted when applicable - so that higher scores indicate greater relevance. - additionalProperties: false - required: - - index - - relevance_score - title: RerankData - description: >- - A single rerank result from a reranking response. - RerankResponse: + title: Index + logprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - finish_reason + - text + - index + title: OpenAICompletionChoice + description: |- + A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice + ConversationItem: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + OpenAIResponseAnnotationCitation: + properties: + type: + type: string + const: url_citation + title: Type + default: url_citation + end_index: + type: integer + title: End Index + start_index: + type: integer + title: Start Index + title: + type: string + title: Title + url: + type: string + title: Url + type: object + required: + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: URL citation annotation for referencing external web resources. + OpenAIResponseAnnotationContainerFileCitation: + properties: + type: + type: string + const: container_file_citation + title: Type + default: container_file_citation + container_id: + type: string + title: Container Id + end_index: + type: integer + title: End Index + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + start_index: + type: integer + title: Start Index + type: object + required: + - container_id + - end_index + - file_id + - filename + - start_index + title: OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + properties: + type: + type: string + const: file_citation + title: Type + default: file_citation + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + index: + type: integer + title: Index + type: object + required: + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: + properties: + type: + type: string + const: file_path + title: Type + default: file_path + file_id: + type: string + title: File Id + index: + type: integer + title: Index + type: object + required: + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + OpenAIResponseContentPartRefusal: + properties: + type: + type: string + const: refusal + title: Type + default: refusal + refusal: + type: string + title: Refusal + type: object + required: + - refusal + title: OpenAIResponseContentPartRefusal + description: Refusal content within a streamed response part. + OpenAIResponseInputFunctionToolCallOutput: + properties: + call_id: + type: string + title: Call Id + output: + type: string + title: Output + type: + type: string + const: function_call_output + title: Type + default: function_call_output + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - call_id + - output + title: OpenAIResponseInputFunctionToolCallOutput + description: This represents the output of a function call that gets passed back to the model. + OpenAIResponseInputMessageContent: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + OpenAIResponseInputMessageContentFile: + properties: + type: + type: string + const: input_file + title: Type + default: input_file + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + file_url: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentFile + description: File content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentImage: + properties: + detail: + title: Detail + default: auto + type: string + enum: + - low + - high + - auto + type: + type: string + const: input_image + title: Type + default: input_image + file_id: + anyOf: + - type: string + - type: 'null' + image_url: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentImage + description: Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: + properties: + text: + type: string + title: Text + type: + type: string + const: input_text + title: Type + default: input_text + type: object + required: + - text + title: OpenAIResponseInputMessageContentText + description: Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + properties: + arguments: + type: string + title: Arguments + id: + type: string + title: Id + name: + type: string + title: Name + server_label: + type: string + title: Server Label + type: + type: string + const: mcp_approval_request + title: Type + default: mcp_approval_request + type: object + required: + - arguments + - id + - name + - server_label + title: OpenAIResponseMCPApprovalRequest + description: A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + properties: + approval_request_id: + type: string + title: Approval Request Id + approve: + type: boolean + title: Approve + type: + type: string + const: mcp_approval_response + title: Type + default: mcp_approval_response + id: + anyOf: + - type: string + - type: 'null' + reason: + anyOf: + - type: string + - type: 'null' + type: object + required: + - approval_request_id + - approve + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. + OpenAIResponseMessage: + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + properties: + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + const: message + default: message + title: Type + type: string + id: + anyOf: + - type: string + - type: 'null' + nullable: true + status: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - content + - role + title: OpenAIResponseMessage + type: object + OpenAIResponseOutputMessageContent: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + OpenAIResponseOutputMessageContentOutputText: + properties: + text: + type: string + title: Text + type: + type: string + const: output_text + title: Type + default: output_text + annotations: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + discriminator: + propertyName: type + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + type: array + title: Annotations + type: object + required: + - text + title: OpenAIResponseOutputMessageContentOutputText + OpenAIResponseOutputMessageFileSearchToolCall: + properties: + id: + type: string + title: Id + queries: + items: + type: string + type: array + title: Queries + status: + type: string + title: Status + type: + type: string + const: file_search_call + title: Type + default: file_search_call + results: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults' + type: array + - type: 'null' + type: object + required: + - id + - queries + - status + title: OpenAIResponseOutputMessageFileSearchToolCall + description: File search tool call output message for OpenAI responses. + OpenAIResponseOutputMessageFunctionToolCall: + properties: + call_id: + type: string + title: Call Id + name: + type: string + title: Name + arguments: + type: string + title: Arguments + type: + type: string + const: function_call + title: Type + default: function_call + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - call_id + - name + - arguments + title: OpenAIResponseOutputMessageFunctionToolCall + description: Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: + properties: + id: + type: string + title: Id + type: + type: string + const: mcp_call + title: Type + default: mcp_call + arguments: + type: string + title: Arguments + name: + type: string + title: Name + server_label: + type: string + title: Server Label + error: + anyOf: + - type: string + - type: 'null' + output: + anyOf: + - type: string + - type: 'null' + type: object + required: + - id + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: + properties: + id: + type: string + title: Id + type: + type: string + const: mcp_list_tools + title: Type + default: mcp_list_tools + server_label: + type: string + title: Server Label + tools: + items: + $ref: '#/components/schemas/MCPListToolsTool' + type: array + title: Tools + type: object + required: + - id + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: MCP list tools output message containing available tools from an MCP server. + OpenAIResponseOutputMessageWebSearchToolCall: + properties: + id: + type: string + title: Id + status: + type: string + title: Status + type: + type: string + const: web_search_call + title: Type + default: web_search_call + type: object + required: + - id + - status + title: OpenAIResponseOutputMessageWebSearchToolCall + description: Web search tool call output message for OpenAI responses. + Conversation: + properties: + id: + type: string + title: Id + description: The unique ID of the conversation. + object: + type: string + const: conversation + title: Object + description: The object type, which is always conversation. + default: conversation + created_at: + type: integer + title: Created At + description: The time at which the conversation was created, measured in seconds since the Unix epoch. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. + items: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + type: object + required: + - id + - created_at + title: Conversation + description: OpenAI-compatible conversation object. + ConversationDeletedResource: + properties: + id: + type: string + title: Id + description: The deleted conversation identifier + object: + type: string + title: Object + description: Object type + default: conversation.deleted + deleted: + type: boolean + title: Deleted + description: Whether the object was deleted + default: true + type: object + required: + - id + title: ConversationDeletedResource + description: Response for deleted conversation. + ConversationItemList: + properties: + object: + type: string + title: Object + description: Object type + default: list + data: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (9 variants) + type: array + title: Data + description: List of conversation items + first_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the first item in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: The ID of the last item in the list + has_more: + type: boolean + title: Has More + description: Whether there are more items available + default: false + type: object + required: + - data + title: ConversationItemList + description: List of conversation items with pagination. + ConversationItemDeletedResource: + properties: + id: + type: string + title: Id + description: The deleted item identifier + object: + type: string + title: Object + description: Object type + default: conversation.item.deleted + deleted: + type: boolean + title: Deleted + description: Whether the object was deleted + default: true + type: object + required: + - id + title: ConversationItemDeletedResource + description: Response for deleted conversation item. + OpenAIEmbeddingsRequestWithExtraBody: + properties: + model: + type: string + title: Model + input: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + encoding_format: + anyOf: + - type: string + - type: 'null' + default: float + dimensions: + anyOf: + - type: integer + - type: 'null' + user: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: + properties: + object: + type: string + const: embedding + title: Object + default: embedding + embedding: + anyOf: + - items: + type: number + type: array + title: list[number] + - type: string + title: list[number] | string + index: + type: integer + title: Index + type: object + required: + - embedding + - index + title: OpenAIEmbeddingData + description: A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: + properties: + prompt_tokens: + type: integer + title: Prompt Tokens + total_tokens: + type: integer + title: Total Tokens + type: object + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: + properties: + object: + type: string + const: list + title: Object + default: list + data: + items: + $ref: '#/components/schemas/OpenAIEmbeddingData' + type: array + title: Data + model: + type: string + title: Model + usage: + $ref: '#/components/schemas/OpenAIEmbeddingUsage' + type: object + required: + - data + - model + - usage + title: OpenAIEmbeddingsResponse + description: Response from an OpenAI-compatible embeddings request. + OpenAIFilePurpose: + type: string + enum: + - assistants + - batch + title: OpenAIFilePurpose + description: Valid purpose values for OpenAI Files API. + ListOpenAIFileResponse: properties: data: - type: array items: - $ref: '#/components/schemas/RerankData' - description: >- - List of rerank result objects, sorted by relevance score (descending) - additionalProperties: false - required: - - data - title: RerankResponse - description: Response from a reranking request. - Checkpoint: + $ref: '#/components/schemas/OpenAIFileObject' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIFileResponse + description: Response for listing files in OpenAI Files API. + OpenAIFileObject: + properties: + object: + type: string + const: file + title: Object + default: file + id: + type: string + title: Id + bytes: + type: integer + title: Bytes + created_at: + type: integer + title: Created At + expires_at: + type: integer + title: Expires At + filename: + type: string + title: Filename + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + type: object + required: + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: + properties: + anchor: + type: string + const: created_at + title: Anchor + seconds: + type: integer + maximum: 2592000.0 + minimum: 3600.0 + title: Seconds + type: object + required: + - anchor + - seconds + title: ExpiresAfter + description: |- + Control expiration of uploaded files. + + Params: + - anchor, must be "created_at" + - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) + OpenAIFileDeleteResponse: + properties: + id: + type: string + title: Id + object: + type: string + const: file + title: Object + default: file + deleted: + type: boolean + title: Deleted + type: object + required: + - id + - deleted + title: OpenAIFileDeleteResponse + description: Response for deleting a file in OpenAI Files API. + HealthInfo: + properties: + status: + $ref: '#/components/schemas/HealthStatus' + type: object + required: + - status + title: HealthInfo + description: Health status information for the service. + RouteInfo: + properties: + route: + type: string + title: Route + method: + type: string + title: Method + provider_types: + items: + type: string + type: array + title: Provider Types + type: object + required: + - route + - method + - provider_types + title: RouteInfo + description: Information about an API route including its path, method, and implementing providers. + ListRoutesResponse: + properties: + data: + items: + $ref: '#/components/schemas/RouteInfo' + type: array + title: Data + type: object + required: + - data + title: ListRoutesResponse + description: Response containing a list of all available API routes. + OpenAIModel: + properties: + id: + type: string + title: Id + object: + type: string + const: model + title: Object + default: model + created: + type: integer + title: Created + owned_by: + type: string + title: Owned By + custom_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - id + - created + - owned_by + title: OpenAIModel + description: |- + A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + OpenAIListModelsResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAIModel' + type: array + title: Data + type: object + required: + - data + title: OpenAIListModelsResponse + Model: properties: identifier: type: string - description: Unique identifier for the checkpoint + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: model + title: Type + default: model + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + type: object + required: + - identifier + - provider_id + title: Model + description: A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + - rerank + title: ModelType + description: Enumeration of supported model types in Llama Stack. + ModerationObject: + properties: + id: + type: string + title: Id + model: + type: string + title: Model + results: + items: + $ref: '#/components/schemas/ModerationObjectResults' + type: array + title: Results + type: object + required: + - id + - model + - results + title: ModerationObject + description: A moderation object. + ModerationObjectResults: + properties: + flagged: + type: boolean + title: Flagged + categories: + anyOf: + - additionalProperties: + type: boolean + type: object + - type: 'null' + category_applied_input_types: + anyOf: + - additionalProperties: + items: + type: string + type: array + type: object + - type: 'null' + category_scores: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + user_message: + anyOf: + - type: string + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + type: object + required: + - flagged + title: ModerationObjectResults + description: A moderation object. + Prompt: + properties: + prompt: + anyOf: + - type: string + - type: 'null' + description: The system prompt with variable placeholders + version: + type: integer + minimum: 1.0 + title: Version + description: Version (integer starting at 1, incremented on save) + prompt_id: + type: string + title: Prompt Id + description: Unique identifier in format 'pmpt_<48-digit-hash>' + variables: + items: + type: string + type: array + title: Variables + description: List of variable names that can be used in the prompt template + is_default: + type: boolean + title: Is Default + description: Boolean indicating whether this version is the default version + default: false + type: object + required: + - version + - prompt_id + title: Prompt + description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + ListPromptsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Prompt' + type: array + title: Data + type: object + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + ProviderInfo: + properties: + api: + type: string + title: Api + provider_id: + type: string + title: Provider Id + provider_type: + type: string + title: Provider Type + config: + additionalProperties: true + type: object + title: Config + health: + additionalProperties: true + type: object + title: Health + type: object + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: Information about a registered provider including its configuration and health status. + ListProvidersResponse: + properties: + data: + items: + $ref: '#/components/schemas/ProviderInfo' + type: array + title: Data + type: object + required: + - data + title: ListProvidersResponse + description: Response containing a list of all available providers. + ListOpenAIResponseObject: + properties: + data: + items: + $ref: '#/components/schemas/OpenAIResponseObjectWithInput' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIResponseObject + description: Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: + properties: + code: + type: string + title: Code + message: + type: string + title: Message + type: object + required: + - code + - message + title: OpenAIResponseError + description: Error details for failed OpenAI response requests. + OpenAIResponseInput: + anyOf: + - discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage + OpenAIResponseInputToolFileSearch: + properties: + type: + type: string + const: file_search + title: Type + default: file_search + vector_store_ids: + items: + type: string + type: array + title: Vector Store Ids + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + max_num_results: + anyOf: + - type: integer + maximum: 50.0 + minimum: 1.0 + - type: 'null' + default: 10 + ranking_options: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions + type: object + required: + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: + properties: + type: + type: string + const: function + title: Type + default: function + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + parameters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + required: + - name + - parameters + title: OpenAIResponseInputToolFunction + description: Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: + properties: + type: + title: Type + default: web_search + type: string + enum: + - web_search + - web_search_preview + - web_search_preview_2025_03_11 + - web_search_2025_08_26 + search_context_size: + anyOf: + - type: string + pattern: ^low|medium|high$ + - type: 'null' + default: medium + type: object + title: OpenAIResponseInputToolWebSearch + description: Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: + properties: + created_at: + type: integer + title: Created At + error: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError + id: + type: string + title: Id + model: + type: string + title: Model + object: + type: string + const: response + title: Object + default: response + output: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true + previous_response_id: + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt + status: + type: string + title: Status + temperature: + anyOf: + - type: number + - type: 'null' + text: + $ref: '#/components/schemas/OpenAIResponseText' + default: + format: + type: text + top_p: + anyOf: + - type: number + - type: 'null' + tools: + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' + truncation: + anyOf: + - type: string + - type: 'null' + usage: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage + instructions: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + input: + items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Input + type: object + required: + - created_at + - id + - model + - output + - status + - input + title: OpenAIResponseObjectWithInput + description: OpenAI response object extended with input context information. + OpenAIResponseOutput: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + OpenAIResponsePrompt: + properties: + id: + type: string + title: Id + variables: + anyOf: + - additionalProperties: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: object + - type: 'null' + version: + anyOf: + - type: string + - type: 'null' + type: object + required: + - id + title: OpenAIResponsePrompt + description: OpenAI compatible Prompt object that is used in OpenAI responses. + OpenAIResponseText: + properties: + format: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseTextFormat' + title: OpenAIResponseTextFormat + - type: 'null' + title: OpenAIResponseTextFormat + type: object + title: OpenAIResponseText + description: Text response configuration for OpenAI responses. + OpenAIResponseTool: + discriminator: + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + OpenAIResponseToolMCP: + properties: + type: + type: string + const: mcp + title: Type + default: mcp + server_label: + type: string + title: Server Label + allowed_tools: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter + type: object + required: + - server_label + title: OpenAIResponseToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + output_tokens: + type: integer + title: Output Tokens + total_tokens: + type: integer + title: Total Tokens + input_tokens_details: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails' + title: OpenAIResponseUsageInputTokensDetails + - type: 'null' + title: OpenAIResponseUsageInputTokensDetails + output_tokens_details: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails' + title: OpenAIResponseUsageOutputTokensDetails + - type: 'null' + title: OpenAIResponseUsageOutputTokensDetails + type: object + required: + - input_tokens + - output_tokens + - total_tokens + title: OpenAIResponseUsage + description: Usage information for OpenAI response. + ResponseGuardrailSpec: + description: Specification for a guardrail to apply during response generation. + properties: + type: + title: Type + type: string + required: + - type + title: ResponseGuardrailSpec + type: object + OpenAIResponseInputTool: + discriminator: + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + OpenAIResponseInputToolMCP: + properties: + type: + type: string + const: mcp + title: Type + default: mcp + server_label: + type: string + title: Server Label + server_url: + type: string + title: Server Url + headers: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + authorization: + anyOf: + - type: string + - type: 'null' + require_approval: + anyOf: + - type: string + const: always + - type: string + const: never + - $ref: '#/components/schemas/ApprovalFilter' + title: ApprovalFilter + title: string | ApprovalFilter + default: never + allowed_tools: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter + type: object + required: + - server_label + - server_url + title: OpenAIResponseInputToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + OpenAIResponseObject: + properties: + created_at: + type: integer + title: Created At + error: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError + id: + type: string + title: Id + model: + type: string + title: Model + object: + type: string + const: response + title: Object + default: response + output: + items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true + previous_response_id: + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt + status: + type: string + title: Status + temperature: + anyOf: + - type: number + - type: 'null' + text: + $ref: '#/components/schemas/OpenAIResponseText' + default: + format: + type: text + top_p: + anyOf: + - type: number + - type: 'null' + tools: + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' + truncation: + anyOf: + - type: string + - type: 'null' + usage: + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage + instructions: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object + required: + - created_at + - id + - model + - output + - status + title: OpenAIResponseObject + description: Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + description: Text content within a streamed response part. + properties: + type: + const: output_text + default: output_text + title: Type + type: string + text: + title: Text + type: string + annotations: + items: + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + title: Annotations + type: array + logprobs: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + nullable: true + required: + - text + title: OpenAIResponseContentPartOutputText + type: object + OpenAIResponseContentPartReasoningSummary: + description: Reasoning summary part in a streamed response. + properties: + type: + const: summary_text + default: summary_text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: OpenAIResponseContentPartReasoningSummary + type: object + OpenAIResponseContentPartReasoningText: + description: Reasoning text emitted as part of a streamed response. + properties: + type: + const: reasoning_text + default: reasoning_text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: OpenAIResponseContentPartReasoningText + type: object + OpenAIResponseObjectStream: + discriminator: + mapping: + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + title: OpenAIResponseObjectStreamResponseCreated + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + title: OpenAIResponseObjectStreamResponseInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + title: OpenAIResponseObjectStreamResponseOutputItemAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + title: OpenAIResponseObjectStreamResponseOutputItemDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + title: OpenAIResponseObjectStreamResponseOutputTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + title: OpenAIResponseObjectStreamResponseOutputTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + title: OpenAIResponseObjectStreamResponseMcpCallFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + title: OpenAIResponseObjectStreamResponseContentPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + title: OpenAIResponseObjectStreamResponseContentPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + title: OpenAIResponseObjectStreamResponseReasoningTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + title: OpenAIResponseObjectStreamResponseRefusalDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + title: OpenAIResponseObjectStreamResponseRefusalDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + title: OpenAIResponseObjectStreamResponseIncomplete + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + title: OpenAIResponseObjectStreamResponseFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + title: OpenAIResponseObjectStreamResponseCompleted + title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + OpenAIResponseObjectStreamResponseCompleted: + description: Streaming event indicating a response has been completed. + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + const: response.completed + default: response.completed + title: Type + type: string + required: + - response + title: OpenAIResponseObjectStreamResponseCompleted + type: object + OpenAIResponseObjectStreamResponseContentPartAdded: + description: Streaming event for when a new content part is added to a response item. + properties: + content_index: + title: Content Index + type: integer + response_id: + title: Response Id + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + part: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + sequence_number: + title: Sequence Number + type: integer + type: + const: response.content_part.added + default: response.content_part.added + title: Type + type: string + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartAdded + type: object + OpenAIResponseObjectStreamResponseContentPartDone: + description: Streaming event for when a content part is completed. + properties: + content_index: + title: Content Index + type: integer + response_id: + title: Response Id + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + part: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + sequence_number: + title: Sequence Number + type: integer + type: + const: response.content_part.done + default: response.content_part.done + title: Type + type: string + required: + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartDone + type: object + OpenAIResponseObjectStreamResponseCreated: + description: Streaming event indicating a new response has been created. + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + const: response.created + default: response.created + title: Type + type: string + required: + - response + title: OpenAIResponseObjectStreamResponseCreated + type: object + OpenAIResponseObjectStreamResponseFailed: + description: Streaming event emitted when a response fails. + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + sequence_number: + title: Sequence Number + type: integer + type: + const: response.failed + default: response.failed + title: Type + type: string + required: + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseFailed + type: object + OpenAIResponseObjectStreamResponseFileSearchCallCompleted: + description: Streaming event for completed file search calls. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.file_search_call.completed + default: response.file_search_call.completed + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + type: object + OpenAIResponseObjectStreamResponseFileSearchCallInProgress: + description: Streaming event for file search calls in progress. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.file_search_call.in_progress + default: response.file_search_call.in_progress + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + type: object + OpenAIResponseObjectStreamResponseFileSearchCallSearching: + description: Streaming event for file search currently searching. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.file_search_call.searching + default: response.file_search_call.searching + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta: + description: Streaming event for incremental function call argument updates. + properties: + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.function_call_arguments.delta + default: response.function_call_arguments.delta + title: Type + type: string + required: + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone: + description: Streaming event for when function call arguments are completed. + properties: + arguments: + title: Arguments + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.function_call_arguments.done + default: response.function_call_arguments.done + title: Type + type: string + required: + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + type: object + OpenAIResponseObjectStreamResponseInProgress: + description: Streaming event indicating the response remains in progress. + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + sequence_number: + title: Sequence Number + type: integer + type: + const: response.in_progress + default: response.in_progress + title: Type + type: string + required: + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseInProgress + type: object + OpenAIResponseObjectStreamResponseIncomplete: + description: Streaming event emitted when a response ends in an incomplete state. + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + sequence_number: + title: Sequence Number + type: integer + type: + const: response.incomplete + default: response.incomplete + title: Type + type: string + required: + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseIncomplete + type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta: + properties: + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_call.arguments.delta + default: response.mcp_call.arguments.delta + title: Type + type: string + required: + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone: + properties: + arguments: + title: Arguments + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_call.arguments.done + default: response.mcp_call.arguments.done + title: Type + type: string + required: + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + type: object + OpenAIResponseObjectStreamResponseMcpCallCompleted: + description: Streaming event for completed MCP calls. + properties: + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_call.completed + default: response.mcp_call.completed + title: Type + type: string + required: + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + type: object + OpenAIResponseObjectStreamResponseMcpCallFailed: + description: Streaming event for failed MCP calls. + properties: + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_call.failed + default: response.mcp_call.failed + title: Type + type: string + required: + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallFailed + type: object + OpenAIResponseObjectStreamResponseMcpCallInProgress: + description: Streaming event for MCP calls in progress. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_call.in_progress + default: response.mcp_call.in_progress + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + type: object + OpenAIResponseObjectStreamResponseMcpListToolsCompleted: + properties: + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_list_tools.completed + default: response.mcp_list_tools.completed + title: Type + type: string + required: + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + type: object + OpenAIResponseObjectStreamResponseMcpListToolsFailed: + properties: + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_list_tools.failed + default: response.mcp_list_tools.failed + title: Type + type: string + required: + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + type: object + OpenAIResponseObjectStreamResponseMcpListToolsInProgress: + properties: + sequence_number: + title: Sequence Number + type: integer + type: + const: response.mcp_list_tools.in_progress + default: response.mcp_list_tools.in_progress + title: Type + type: string + required: + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + type: object + OpenAIResponseObjectStreamResponseOutputItemAdded: + description: Streaming event for when a new output item is added to the response. + properties: + response_id: + title: Response Id + type: string + item: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.output_item.added + default: response.output_item.added + title: Type + type: string + required: + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemAdded + type: object + OpenAIResponseObjectStreamResponseOutputItemDone: + description: Streaming event for when an output item is completed. + properties: + response_id: + title: Response Id + type: string + item: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.output_item.done + default: response.output_item.done + title: Type + type: string + required: + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemDone + type: object + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded: + description: Streaming event for when an annotation is added to output text. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + content_index: + title: Content Index + type: integer + annotation_index: + title: Annotation Index + type: integer + annotation: + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + sequence_number: + title: Sequence Number + type: integer + type: + const: response.output_text.annotation.added + default: response.output_text.annotation.added + title: Type + type: string + required: + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + type: object + OpenAIResponseObjectStreamResponseOutputTextDelta: + description: Streaming event for incremental text content updates. + properties: + content_index: + title: Content Index + type: integer + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.output_text.delta + default: response.output_text.delta + title: Type + type: string + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDelta + type: object + OpenAIResponseObjectStreamResponseOutputTextDone: + description: Streaming event for when text output is completed. + properties: + content_index: + title: Content Index + type: integer + text: + title: Text + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.output_text.done + default: response.output_text.done + title: Type + type: string + required: + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDone + type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded: + description: Streaming event for when a new reasoning summary part is added. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + sequence_number: + title: Sequence Number + type: integer + summary_index: + title: Summary Index + type: integer + type: + const: response.reasoning_summary_part.added + default: response.reasoning_summary_part.added + title: Type + type: string + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone: + description: Streaming event for when a reasoning summary part is completed. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + part: + $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' + sequence_number: + title: Sequence Number + type: integer + summary_index: + title: Summary Index + type: integer + type: + const: response.reasoning_summary_part.done + default: response.reasoning_summary_part.done + title: Type + type: string + required: + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta: + description: Streaming event for incremental reasoning summary text updates. + properties: + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + summary_index: + title: Summary Index + type: integer + type: + const: response.reasoning_summary_text.delta + default: response.reasoning_summary_text.delta + title: Type + type: string + required: + - delta + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone: + description: Streaming event for when reasoning summary text is completed. + properties: + text: + title: Text + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + summary_index: + title: Summary Index + type: integer + type: + const: response.reasoning_summary_text.done + default: response.reasoning_summary_text.done + title: Type + type: string + required: + - text + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + type: object + OpenAIResponseObjectStreamResponseReasoningTextDelta: + description: Streaming event for incremental reasoning text updates. + properties: + content_index: + title: Content Index + type: integer + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.reasoning_text.delta + default: response.reasoning_text.delta + title: Type + type: string + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + type: object + OpenAIResponseObjectStreamResponseReasoningTextDone: + description: Streaming event for when reasoning text is completed. + properties: + content_index: + title: Content Index + type: integer + text: + title: Text + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.reasoning_text.done + default: response.reasoning_text.done + title: Type + type: string + required: + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDone + type: object + OpenAIResponseObjectStreamResponseRefusalDelta: + description: Streaming event for incremental refusal text updates. + properties: + content_index: + title: Content Index + type: integer + delta: + title: Delta + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.refusal.delta + default: response.refusal.delta + title: Type + type: string + required: + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDelta + type: object + OpenAIResponseObjectStreamResponseRefusalDone: + description: Streaming event for when refusal text is completed. + properties: + content_index: + title: Content Index + type: integer + refusal: + title: Refusal + type: string + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.refusal.done + default: response.refusal.done + title: Type + type: string + required: + - content_index + - refusal + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDone + type: object + OpenAIResponseObjectStreamResponseWebSearchCallCompleted: + description: Streaming event for completed web search calls. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.web_search_call.completed + default: response.web_search_call.completed + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + type: object + OpenAIResponseObjectStreamResponseWebSearchCallInProgress: + description: Streaming event for web search calls in progress. + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.web_search_call.in_progress + default: response.web_search_call.in_progress + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + type: object + OpenAIResponseObjectStreamResponseWebSearchCallSearching: + properties: + item_id: + title: Item Id + type: string + output_index: + title: Output Index + type: integer + sequence_number: + title: Sequence Number + type: integer + type: + const: response.web_search_call.searching + default: response.web_search_call.searching + title: Type + type: string + required: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + type: object + OpenAIDeleteResponseObject: + properties: + id: + type: string + title: Id + object: + type: string + const: response + title: Object + default: response + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: OpenAIDeleteResponseObject + description: Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: + properties: + data: + items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Data + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + title: ListOpenAIResponseInputItem + description: List container for OpenAI response input items. + RunShieldResponse: + properties: + violation: + anyOf: + - $ref: '#/components/schemas/SafetyViolation' + title: SafetyViolation + - type: 'null' + title: SafetyViolation + type: object + title: RunShieldResponse + description: Response from running a safety shield. + SafetyViolation: + properties: + violation_level: + $ref: '#/components/schemas/ViolationLevel' + user_message: + anyOf: + - type: string + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + type: object + required: + - violation_level + title: SafetyViolation + description: Details of a safety violation detected by content moderation. + ViolationLevel: + type: string + enum: + - info + - warn + - error + title: ViolationLevel + description: Severity level of a safety violation. + AggregationFunctionType: + type: string + enum: + - average + - weighted_average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: Types of aggregation functions for scoring results. + ArrayType: + properties: + type: + type: string + const: array + title: Type + default: array + type: object + title: ArrayType + description: Parameter type for array values. + BasicScoringFnParams: + properties: + type: + type: string + const: basic + title: Type + default: basic + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + title: BasicScoringFnParams + description: Parameters for basic scoring function configuration. + BooleanType: + properties: + type: + type: string + const: boolean + title: Type + default: boolean + type: object + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + properties: + type: + type: string + const: chat_completion_input + title: Type + default: chat_completion_input + type: object + title: ChatCompletionInputType + description: Parameter type for chat completion input. + CompletionInputType: + properties: + type: + type: string + const: completion_input + title: Type + default: completion_input + type: object + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + properties: + type: + type: string + const: json + title: Type + default: json + type: object + title: JsonType + description: Parameter type for JSON values. + LLMAsJudgeScoringFnParams: + properties: + type: + type: string + const: llm_as_judge + title: Type + default: llm_as_judge + judge_model: + type: string + title: Judge Model + prompt_template: + anyOf: + - type: string + - type: 'null' + judge_score_regexes: + items: + type: string + type: array + title: Judge Score Regexes + description: Regexes to extract the answer from generated response + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + required: + - judge_model + title: LLMAsJudgeScoringFnParams + description: Parameters for LLM-as-judge scoring function configuration. + NumberType: + properties: + type: + type: string + const: number + title: Type + default: number + type: object + title: NumberType + description: Parameter type for numeric values. + ObjectType: + properties: + type: + type: string + const: object + title: Type + default: object + type: object + title: ObjectType + description: Parameter type for object values. + RegexParserScoringFnParams: + properties: + type: + type: string + const: regex_parser + title: Type + default: regex_parser + parsing_regexes: + items: + type: string + type: array + title: Parsing Regexes + description: Regex to extract the answer from generated response + aggregation_functions: + items: + $ref: '#/components/schemas/AggregationFunctionType' + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row + type: object + title: RegexParserScoringFnParams + description: Parameters for regex parser scoring function configuration. + ScoringFn: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: scoring_function + title: Type + default: scoring_function + description: + anyOf: + - type: string + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this definition + return_type: + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + description: The return type of the deterministic function + discriminator: + propertyName: type + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + params: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval + type: object + required: + - identifier + - provider_id + - return_type + title: ScoringFn + description: A scoring function resource for evaluating model outputs. + ScoringFnParams: + discriminator: + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + ScoringFnParamsType: + description: Types of scoring function parameter configurations. + enum: + - llm_as_judge + - regex_parser + - basic + title: ScoringFnParamsType + type: string + StringType: + properties: + type: + type: string + const: string + title: Type + default: string + type: object + title: StringType + description: Parameter type for string values. + UnionType: + properties: + type: + type: string + const: union + title: Type + default: union + type: object + title: UnionType + description: Parameter type for union values. + ListScoringFunctionsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ScoringFn' + type: array + title: Data + type: object + required: + - data + title: ListScoringFunctionsResponse + ScoreResponse: + properties: + results: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Results + type: object + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoringResult: + properties: + score_rows: + items: + additionalProperties: true + type: object + type: array + title: Score Rows + aggregated_results: + additionalProperties: true + type: object + title: Aggregated Results + type: object + required: + - score_rows + - aggregated_results + title: ScoringResult + description: A scoring result for a single row. + ScoreBatchResponse: + properties: + dataset_id: + anyOf: + - type: string + - type: 'null' + results: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Results + type: object + required: + - results + title: ScoreBatchResponse + description: Response from batch scoring operations on datasets. + Shield: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: shield + title: Type + default: shield + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - identifier + - provider_id + title: Shield + description: A safety shield resource that can be used to check content. + ListShieldsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Shield' + type: array + title: Data + type: object + required: + - data + title: ListShieldsResponse + ImageContentItem: + description: A image content item + properties: + type: + const: image + default: image + title: Type + type: string + image: + $ref: '#/components/schemas/_URLOrData' + required: + - image + title: ImageContentItem + type: object + InterleavedContent: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + InterleavedContentItem: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + TextContentItem: + properties: + type: + type: string + const: text + title: Type + default: text + text: + type: string + title: Text + type: object + required: + - text + title: TextContentItem + description: A text content item + ToolInvocationResult: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem-Output | TextContentItem] + error_message: + anyOf: + - type: string + - type: 'null' + error_code: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: ToolInvocationResult + description: Result of a tool invocation. + URL: + properties: + uri: + type: string + title: Uri + type: object + required: + - uri + title: URL + description: A URL reference to external content. + ToolDef: + properties: + toolgroup_id: + anyOf: + - type: string + - type: 'null' + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + output_schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - name + title: ToolDef + description: Tool definition used in runtime contexts. + ListToolDefsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ToolDef' + type: array + title: Data + type: object + required: + - data + title: ListToolDefsResponse + description: Response containing a list of tool definitions. + ToolGroup: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: tool_group + title: Type + default: tool_group + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - identifier + - provider_id + title: ToolGroup + description: A group of related tools managed together. + ListToolGroupsResponse: + properties: + data: + items: + $ref: '#/components/schemas/ToolGroup' + type: array + title: Data + type: object + required: + - data + title: ListToolGroupsResponse + description: Response containing a list of tool groups. + Chunk: + description: A chunk of content that can be inserted into a vector database. + properties: + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + chunk_id: + title: Chunk Id + type: string + metadata: + additionalProperties: true + title: Metadata + type: object + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + nullable: true + title: ChunkMetadata + required: + - content + - chunk_id + title: Chunk + type: object + ChunkMetadata: + properties: + chunk_id: + anyOf: + - type: string + - type: 'null' + document_id: + anyOf: + - type: string + - type: 'null' + source: + anyOf: + - type: string + - type: 'null' + created_timestamp: + anyOf: + - type: integer + - type: 'null' + updated_timestamp: + anyOf: + - type: integer + - type: 'null' + chunk_window: + anyOf: + - type: string + - type: 'null' + chunk_tokenizer: + anyOf: + - type: string + - type: 'null' + chunk_embedding_model: + anyOf: + - type: string + - type: 'null' + chunk_embedding_dimension: + anyOf: + - type: integer + - type: 'null' + content_token_count: + anyOf: + - type: integer + - type: 'null' + metadata_token_count: + anyOf: + - type: integer + - type: 'null' + type: object + title: ChunkMetadata + description: |- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + QueryChunksResponse: + properties: + chunks: + items: + $ref: '#/components/schemas/Chunk-Output' + type: array + title: Chunks + scores: + items: + type: number + type: array + title: Scores + type: object + required: + - chunks + - scores + title: QueryChunksResponse + description: Response from querying chunks in a vector database. + VectorStoreFileCounts: + properties: + completed: + type: integer + title: Completed + cancelled: + type: integer + title: Cancelled + failed: + type: integer + title: Failed + in_progress: + type: integer + title: In Progress + total: + type: integer + title: Total + type: object + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: File processing status counts for a vector store. + VectorStoreListResponse: + properties: + object: + type: string + title: Object + default: list + data: + items: + $ref: '#/components/schemas/VectorStoreObject' + type: array + title: Data + first_id: + anyOf: + - type: string + - type: 'null' + last_id: + anyOf: + - type: string + - type: 'null' + has_more: + type: boolean + title: Has More + default: false + type: object + required: + - data + title: VectorStoreListResponse + description: Response from listing vector stores. + VectorStoreObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object + default: vector_store + created_at: + type: integer + title: Created At + name: + anyOf: + - type: string + - type: 'null' + usage_bytes: + type: integer + title: Usage Bytes + default: 0 + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + status: + type: string + title: Status + default: completed + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + last_active_at: + anyOf: + - type: integer + - type: 'null' + metadata: + additionalProperties: true + type: object + title: Metadata + type: object + required: + - id + - created_at + - file_counts + title: VectorStoreObject + description: OpenAI Vector Store object. + VectorStoreChunkingStrategy: + discriminator: + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + propertyName: type + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + VectorStoreChunkingStrategyAuto: + properties: + type: + type: string + const: auto + title: Type + default: auto + type: object + title: VectorStoreChunkingStrategyAuto + description: Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + properties: + type: + type: string + const: static + title: Type + default: static + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + type: object + required: + - static + title: VectorStoreChunkingStrategyStatic + description: Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + properties: + chunk_overlap_tokens: + type: integer + title: Chunk Overlap Tokens + default: 400 + max_chunk_size_tokens: + type: integer + maximum: 4096.0 + minimum: 100.0 + title: Max Chunk Size Tokens + default: 800 + type: object + title: VectorStoreChunkingStrategyStaticConfig + description: Configuration for static chunking strategy. + OpenAICreateVectorStoreRequestWithExtraBody: + properties: + name: + anyOf: + - type: string + - type: 'null' + file_ids: + anyOf: + - items: + type: string + type: array + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + additionalProperties: true + type: object + title: OpenAICreateVectorStoreRequestWithExtraBody + description: Request to create a vector store with extra_body support. + VectorStoreDeleteResponse: + properties: + id: + type: string + title: Id + object: + type: string + title: Object + default: vector_store.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + OpenAICreateVectorStoreFileBatchRequestWithExtraBody: + properties: + file_ids: + items: + type: string + type: array + title: File Ids + attributes: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + additionalProperties: true + type: object + required: + - file_ids + title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object + default: vector_store.file_batch + created_at: + type: integer + title: Created At + vector_store_id: + type: string + title: Vector Store Id + status: + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + file_counts: + $ref: '#/components/schemas/VectorStoreFileCounts' + type: object + required: + - id + - created_at + - vector_store_id + - status + - file_counts + title: VectorStoreFileBatchObject + description: OpenAI Vector Store File Batch object. + VectorStoreFileStatus: + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + VectorStoreFileLastError: + properties: + code: + title: Code + type: string + enum: + - server_error + - rate_limit_exceeded + default: server_error + message: + type: string + title: Message + type: object + required: + - code + - message + title: VectorStoreFileLastError + description: Error information for failed vector store file processing. + VectorStoreFileObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object + default: vector_store.file + attributes: + additionalProperties: + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + title: string | number | boolean + propertyNames: + type: string + maxLength: 64 + type: object + maxProperties: 16 + title: Attributes + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers. + x-oaiTypeLabel: map + chunking_strategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + created_at: + type: integer + title: Created At + last_error: + anyOf: + - $ref: '#/components/schemas/VectorStoreFileLastError' + title: VectorStoreFileLastError + - type: 'null' + title: VectorStoreFileLastError + status: + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + usage_bytes: + type: integer + title: Usage Bytes + default: 0 + vector_store_id: + type: string + title: Vector Store Id + type: object + required: + - id + - chunking_strategy + - created_at + - status + - vector_store_id + title: VectorStoreFileObject + description: OpenAI Vector Store File object. + VectorStoreFilesListInBatchResponse: + properties: + object: + type: string + title: Object + default: list + data: + items: + $ref: '#/components/schemas/VectorStoreFileObject' + type: array + title: Data + first_id: + anyOf: + - type: string + - type: 'null' + last_id: + anyOf: + - type: string + - type: 'null' + has_more: + type: boolean + title: Has More + default: false + type: object + required: + - data + title: VectorStoreFilesListInBatchResponse + description: Response from listing files in a vector store file batch. + VectorStoreListFilesResponse: + properties: + object: + type: string + title: Object + default: list + data: + items: + $ref: '#/components/schemas/VectorStoreFileObject' + type: array + title: Data + first_id: + anyOf: + - type: string + - type: 'null' + last_id: + anyOf: + - type: string + - type: 'null' + has_more: + type: boolean + title: Has More + default: false + type: object + required: + - data + title: VectorStoreListFilesResponse + description: Response from listing files in a vector store. + VectorStoreFileDeleteResponse: + properties: + id: + type: string + title: Id + object: + type: string + title: Object + default: vector_store.file.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreFileDeleteResponse + description: Response from deleting a vector store file. + VectorStoreContent: + properties: + type: + type: string + const: text + title: Type + text: + type: string + title: Text + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - type + - text + title: VectorStoreContent + description: Content item from a vector store file or search result. + VectorStoreFileContentResponse: + properties: + object: + type: string + const: vector_store.file_content.page + title: Object + default: vector_store.file_content.page + data: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - data + title: VectorStoreFileContentResponse + description: Represents the parsed content of a vector store file. + VectorStoreSearchResponse: + properties: + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + attributes: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: number + - type: boolean + title: string | number | boolean + type: object + - type: 'null' + content: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Content + type: object + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + properties: + object: + type: string + title: Object + default: vector_store.search_results.page + search_query: + items: + type: string + type: array + title: Search Query + data: + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - search_query + - data + title: VectorStoreSearchResponsePage + description: Paginated response from searching a vector store. + VersionInfo: + properties: + version: + type: string + title: Version + type: object + required: + - version + title: VersionInfo + description: Version information for the service. + AppendRowsRequest: + properties: + rows: + items: + additionalProperties: true + type: object + type: array + title: Rows + type: object + required: + - rows + title: AppendRowsRequest + PaginatedResponse: + properties: + data: + items: + additionalProperties: true + type: object + type: array + title: Data + has_more: + type: boolean + title: Has More + url: + anyOf: + - type: string + - type: 'null' + type: object + required: + - data + - has_more + title: PaginatedResponse + description: A generic paginated response that follows a simple format. + Dataset: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: dataset + title: Type + default: dataset + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this dataset + type: object + required: + - identifier + - provider_id + - purpose + - source + title: Dataset + description: Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + properties: + type: + type: string + const: rows + title: Type + default: rows + rows: + items: + additionalProperties: true + type: object + type: array + title: Rows + type: object + required: + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + properties: + type: + type: string + const: uri + title: Type + default: uri + uri: + type: string + title: Uri + type: object + required: + - uri + title: URIDataSource + description: A dataset that can be obtained from a URI. + ListDatasetsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Dataset' + type: array + title: Data + type: object + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + Benchmark: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: benchmark + title: Type + default: benchmark + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + metadata: + additionalProperties: true + type: object + title: Metadata + description: Metadata for this evaluation task + type: object + required: + - identifier + - provider_id + - dataset_id + - scoring_functions + title: Benchmark + description: A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + properties: + data: + items: + $ref: '#/components/schemas/Benchmark' + type: array + title: Data + type: object + required: + - data + title: ListBenchmarksResponse + BenchmarkConfig: + properties: + eval_candidate: + $ref: '#/components/schemas/ModelCandidate' + scoring_params: + additionalProperties: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + type: object + title: Scoring Params + description: Map between scoring function id and parameters for each scoring function you want to run + num_examples: + anyOf: + - type: integer + - type: 'null' + description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated + type: object + required: + - eval_candidate + title: BenchmarkConfig + description: A benchmark configuration for evaluation. + GreedySamplingStrategy: + properties: + type: + type: string + const: greedy + title: Type + default: greedy + type: object + title: GreedySamplingStrategy + description: Greedy sampling strategy that selects the highest probability token at each step. + ModelCandidate: + properties: + type: + type: string + const: model + title: Type + default: model + model: + type: string + title: Model + sampling_params: + $ref: '#/components/schemas/SamplingParams' + system_message: + anyOf: + - $ref: '#/components/schemas/SystemMessage' + title: SystemMessage + - type: 'null' + title: SystemMessage + type: object + required: + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. + SamplingParams: + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + max_tokens: + anyOf: + - type: integer + - type: 'null' + repetition_penalty: + anyOf: + - type: number + - type: 'null' + default: 1.0 + stop: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: SamplingParams + description: Sampling parameters. + SystemMessage: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + type: object + required: + - content + title: SystemMessage + description: A system message providing instructions or context to the model. + TopKSamplingStrategy: + properties: + type: + type: string + const: top_k + title: Type + default: top_k + top_k: + type: integer + minimum: 1.0 + title: Top K + type: object + required: + - top_k + title: TopKSamplingStrategy + description: Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + properties: + type: + type: string + const: top_p + title: Type + default: top_p + temperature: + anyOf: + - type: number + minimum: 0.0 + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + default: 0.95 + type: object + required: + - temperature + title: TopPSamplingStrategy + description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + EvaluateRowsRequest: + properties: + input_rows: + items: + additionalProperties: true + type: object + type: array + title: Input Rows + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + type: object + required: + - input_rows + - scoring_functions + - benchmark_config + title: EvaluateRowsRequest + EvaluateResponse: + properties: + generations: + items: + additionalProperties: true + type: object + type: array + title: Generations + scores: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Scores + type: object + required: + - generations + - scores + title: EvaluateResponse + description: The response from an evaluation. + RunEvalRequest: + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + type: object + required: + - benchmark_config + title: RunEvalRequest + Job: + properties: + job_id: + type: string + title: Job Id + status: + $ref: '#/components/schemas/JobStatus' + type: object + required: + - job_id + - status + title: Job + description: A job execution instance with status tracking. + RerankRequest: + properties: + model: + type: string + title: Model + query: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + items: + items: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + type: array + title: Items + max_num_results: + anyOf: + - type: integer + - type: 'null' + type: object + required: + - model + - query + - items + title: RerankRequest + RerankData: + properties: + index: + type: integer + title: Index + relevance_score: + type: number + title: Relevance Score + type: object + required: + - index + - relevance_score + title: RerankData + description: A single rerank result from a reranking response. + RerankResponse: + properties: + data: + items: + $ref: '#/components/schemas/RerankData' + type: array + title: Data + type: object + required: + - data + title: RerankResponse + description: Response from a reranking request. + Checkpoint: + properties: + identifier: + type: string + title: Identifier created_at: type: string format: date-time - description: >- - Timestamp when the checkpoint was created + title: Created At epoch: type: integer - description: >- - Training epoch when the checkpoint was saved + title: Epoch post_training_job_id: type: string - description: >- - Identifier of the training job that created this checkpoint + title: Post Training Job Id path: type: string - description: >- - File system path where the checkpoint is stored + title: Path training_metrics: - $ref: '#/components/schemas/PostTrainingMetric' - description: >- - (Optional) Training metrics associated with this checkpoint - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/PostTrainingMetric' + title: PostTrainingMetric + - type: 'null' + title: PostTrainingMetric + type: object required: - - identifier - - created_at - - epoch - - post_training_job_id - - path + - identifier + - created_at + - epoch + - post_training_job_id + - path title: Checkpoint description: Checkpoint created during training runs. PostTrainingJobArtifactsResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - checkpoints + - job_uuid title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. PostTrainingMetric: - type: object properties: epoch: type: integer - description: Training epoch number + title: Epoch train_loss: type: number - description: Loss value on the training dataset + title: Train Loss validation_loss: type: number - description: Loss value on the validation dataset + title: Validation Loss perplexity: type: number - description: >- - Perplexity metric indicating model confidence - additionalProperties: false - required: - - epoch - - train_loss - - validation_loss - - perplexity - title: PostTrainingMetric - description: >- - Training metrics captured during post-training jobs. - CancelTrainingJobRequest: + title: Perplexity type: object + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: Training metrics captured during post-training jobs. + CancelTrainingJobRequest: properties: job_uuid: type: string - description: The UUID of the job to cancel. - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: CancelTrainingJobRequest PostTrainingJobStatusResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current status of the training job + $ref: '#/components/schemas/JobStatus' scheduled_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job was scheduled + anyOf: + - type: string + format: date-time + - type: 'null' started_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job execution began + anyOf: + - type: string + format: date-time + - type: 'null' completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job finished, if completed + anyOf: + - type: string + format: date-time + - type: 'null' resources_allocated: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Information about computational resources allocated to the - job + anyOf: + - additionalProperties: true + type: object + - type: 'null' checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - status - - checkpoints + - job_uuid + - status title: PostTrainingJobStatusResponse description: Status of a finetuning job. ListPostTrainingJobsResponse: - type: object properties: data: - type: array items: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - additionalProperties: false + $ref: '#/components/schemas/PostTrainingJob' + type: array + title: Data + type: object required: - - data + - data title: ListPostTrainingJobsResponse DPOAlignmentConfig: - type: object properties: beta: type: number - description: Temperature parameter for the DPO loss + title: Beta loss_type: $ref: '#/components/schemas/DPOLossType' default: sigmoid - description: The type of loss function to use for DPO - additionalProperties: false + type: object required: - - beta - - loss_type + - beta title: DPOAlignmentConfig - description: >- - Configuration for Direct Preference Optimization (DPO) alignment. + description: Configuration for Direct Preference Optimization (DPO) alignment. DPOLossType: type: string enum: - - sigmoid - - hinge - - ipo - - kto_pair + - sigmoid + - hinge + - ipo + - kto_pair title: DPOLossType DataConfig: - type: object properties: dataset_id: type: string - description: >- - Unique identifier for the training dataset + title: Dataset Id batch_size: type: integer - description: Number of samples per training batch + title: Batch Size shuffle: type: boolean - description: >- - Whether to shuffle the dataset during training + title: Shuffle data_format: $ref: '#/components/schemas/DatasetFormat' - description: >- - Format of the dataset (instruct or dialog) validation_dataset_id: - type: string - description: >- - (Optional) Unique identifier for the validation dataset + anyOf: + - type: string + - type: 'null' packed: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to pack multiple samples into a single sequence for - efficiency train_on_input: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to compute loss on input tokens as well as output tokens - additionalProperties: false + type: object required: - - dataset_id - - batch_size - - shuffle - - data_format + - dataset_id + - batch_size + - shuffle + - data_format title: DataConfig - description: >- - Configuration for training data and data loading. + description: Configuration for training data and data loading. DatasetFormat: type: string enum: - - instruct - - dialog + - instruct + - dialog title: DatasetFormat description: Format of the training dataset. EfficiencyConfig: - type: object properties: enable_activation_checkpointing: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use activation checkpointing to reduce memory usage enable_activation_offloading: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload activations to CPU to save GPU memory memory_efficient_fsdp_wrap: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use memory-efficient FSDP wrapping fsdp_cpu_offload: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload FSDP parameters to CPU - additionalProperties: false - title: EfficiencyConfig - description: >- - Configuration for memory and compute efficiency optimizations. - OptimizerConfig: type: object + title: EfficiencyConfig + description: Configuration for memory and compute efficiency optimizations. + OptimizerConfig: properties: optimizer_type: $ref: '#/components/schemas/OptimizerType' - description: >- - Type of optimizer to use (adam, adamw, or sgd) lr: type: number - description: Learning rate for the optimizer + title: Lr weight_decay: type: number - description: >- - Weight decay coefficient for regularization + title: Weight Decay num_warmup_steps: type: integer - description: Number of steps for learning rate warmup - additionalProperties: false + title: Num Warmup Steps + type: object required: - - optimizer_type - - lr - - weight_decay - - num_warmup_steps + - optimizer_type + - lr + - weight_decay + - num_warmup_steps title: OptimizerConfig - description: >- - Configuration parameters for the optimization algorithm. + description: Configuration parameters for the optimization algorithm. OptimizerType: type: string enum: - - adam - - adamw - - sgd + - adam + - adamw + - sgd title: OptimizerType - description: >- - Available optimizer algorithms for training. + description: Available optimizer algorithms for training. TrainingConfig: - type: object properties: n_epochs: type: integer - description: Number of training epochs to run + title: N Epochs max_steps_per_epoch: type: integer + title: Max Steps Per Epoch default: 1 - description: Maximum number of steps to run per epoch gradient_accumulation_steps: type: integer + title: Gradient Accumulation Steps default: 1 - description: >- - Number of steps to accumulate gradients before updating max_validation_steps: - type: integer + anyOf: + - type: integer + - type: 'null' default: 1 - description: >- - (Optional) Maximum number of validation steps per epoch data_config: - $ref: '#/components/schemas/DataConfig' - description: >- - (Optional) Configuration for data loading and formatting + anyOf: + - $ref: '#/components/schemas/DataConfig' + title: DataConfig + - type: 'null' + title: DataConfig optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - description: >- - (Optional) Configuration for the optimization algorithm + anyOf: + - $ref: '#/components/schemas/OptimizerConfig' + title: OptimizerConfig + - type: 'null' + title: OptimizerConfig efficiency_config: - $ref: '#/components/schemas/EfficiencyConfig' - description: >- - (Optional) Configuration for memory and compute optimizations + anyOf: + - $ref: '#/components/schemas/EfficiencyConfig' + title: EfficiencyConfig + - type: 'null' + title: EfficiencyConfig dtype: - type: string + anyOf: + - type: string + - type: 'null' default: bf16 - description: >- - (Optional) Data type for model parameters (bf16, fp16, fp32) - additionalProperties: false - required: - - n_epochs - - max_steps_per_epoch - - gradient_accumulation_steps - title: TrainingConfig - description: >- - Comprehensive configuration for the training process. - PreferenceOptimizeRequest: type: object + required: + - n_epochs + title: TrainingConfig + description: Comprehensive configuration for the training process. + PreferenceOptimizeRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid finetuned_model: type: string - description: The model to fine-tune. + title: Finetuned Model algorithm_config: $ref: '#/components/schemas/DPOAlignmentConfig' - description: The algorithm configuration. training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - additionalProperties: false + title: Logger Config + type: object required: - - job_uuid - - finetuned_model - - algorithm_config - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config title: PreferenceOptimizeRequest PostTrainingJob: - type: object properties: job_uuid: type: string - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: PostTrainingJob AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' discriminator: - propertyName: type mapping: LoRA: '#/components/schemas/LoraFinetuningConfig' QAT: '#/components/schemas/QATFinetuningConfig' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + title: LoraFinetuningConfig | QATFinetuningConfig LoraFinetuningConfig: - type: object properties: type: type: string const: LoRA + title: Type default: LoRA - description: Algorithm type identifier, always "LoRA" lora_attn_modules: - type: array items: type: string - description: >- - List of attention module names to apply LoRA to + type: array + title: Lora Attn Modules apply_lora_to_mlp: type: boolean - description: Whether to apply LoRA to MLP layers + title: Apply Lora To Mlp apply_lora_to_output: type: boolean - description: >- - Whether to apply LoRA to output projection layers + title: Apply Lora To Output rank: type: integer - description: >- - Rank of the LoRA adaptation (lower rank = fewer parameters) + title: Rank alpha: type: integer - description: >- - LoRA scaling parameter that controls adaptation strength + title: Alpha use_dora: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) quantize_base: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to quantize the base model weights - additionalProperties: false - required: - - type - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - title: LoraFinetuningConfig - description: >- - Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - QATFinetuningConfig: type: object + required: + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: properties: type: type: string const: QAT + title: Type default: QAT - description: Algorithm type identifier, always "QAT" quantizer_name: type: string - description: >- - Name of the quantization algorithm to use + title: Quantizer Name group_size: type: integer - description: Size of groups for grouped quantization - additionalProperties: false - required: - - type - - quantizer_name - - group_size - title: QATFinetuningConfig - description: >- - Configuration for Quantization-Aware Training (QAT) fine-tuning. - SupervisedFineTuneRequest: + title: Group Size type: object + required: + - quantizer_name + - group_size + title: QATFinetuningConfig + description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. + title: Logger Config model: - type: string - description: The model to fine-tune. + anyOf: + - type: string + - type: 'null' + description: Model descriptor for training if not in provider config` checkpoint_dir: - type: string - description: The directory to save checkpoint(s) to. + anyOf: + - type: string + - type: 'null' algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' - description: The algorithm configuration. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + title: LoraFinetuningConfig | QATFinetuningConfig + - type: 'null' + title: Algorithm Config + type: object required: - - job_uuid - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - training_config + - hyperparam_search_config + - logger_config title: SupervisedFineTuneRequest + ParamType: + discriminator: + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + propertyName: type + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + DataSource: + discriminator: + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + propertyName: type + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + AllowedToolsFilter: + properties: + tool_names: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: AllowedToolsFilter + description: Filter configuration for restricting which MCP tools can be used. + ApprovalFilter: + properties: + always: + anyOf: + - items: + type: string + type: array + - type: 'null' + never: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: ApprovalFilter + description: Filter configuration for MCP tool approval requirements. + BatchError: + properties: + code: + anyOf: + - type: string + - type: 'null' + line: + anyOf: + - type: integer + - type: 'null' + message: + anyOf: + - type: string + - type: 'null' + param: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: BatchError + BatchRequestCounts: + properties: + completed: + type: integer + title: Completed + failed: + type: integer + title: Failed + total: + type: integer + title: Total + additionalProperties: true + type: object + required: + - completed + - failed + - total + title: BatchRequestCounts + BatchUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + input_tokens_details: + $ref: '#/components/schemas/InputTokensDetails' + output_tokens: + type: integer + title: Output Tokens + output_tokens_details: + $ref: '#/components/schemas/OutputTokensDetails' + total_tokens: + type: integer + title: Total Tokens + additionalProperties: true + type: object + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + Chunk-Output: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + title: string | list[ImageContentItem-Output | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + DatasetPurpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + title: DatasetPurpose + description: Purpose of the dataset. Each purpose has a required input data schema. + Errors: + properties: + data: + anyOf: + - items: + $ref: '#/components/schemas/BatchError' + type: array + - type: 'null' + object: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: Errors + HealthStatus: + type: string + enum: + - OK + - Error + - Not Implemented + title: HealthStatus + ImageContentItem-Input: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + ImageContentItem-Output: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + InputTokensDetails: + properties: + cached_tokens: + type: integer + title: Cached Tokens + additionalProperties: true + type: object + required: + - cached_tokens + title: InputTokensDetails + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus + description: Status of a job execution. + MCPListToolsTool: + properties: + input_schema: + additionalProperties: true + type: object + title: Input Schema + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_schema + - name + title: MCPListToolsTool + description: Tool definition returned by MCP list tools operation. + OpenAIAssistantMessageParam-Input: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIAssistantMessageParam-Output: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIChatCompletionUsageCompletionTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsageCompletionTokensDetails + description: Token details for output tokens in OpenAI chat completion usage. + OpenAIChatCompletionUsagePromptTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsagePromptTokensDetails + description: Token details for prompt tokens in OpenAI chat completion usage. + OpenAIResponseMessage-Output: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseOutputMessageFileSearchToolCallResults: + properties: + attributes: + additionalProperties: true + type: object + title: Attributes + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + text: + type: string + title: Text + type: object + required: + - attributes + - file_id + - filename + - score + - text + title: OpenAIResponseOutputMessageFileSearchToolCallResults + description: Search results returned by the file search operation. + OpenAIResponseTextFormat: + properties: + type: + title: Type + type: string + enum: + - text + - json_schema + - json_object + default: text + name: + anyOf: + - type: string + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + title: OpenAIResponseTextFormat + description: Configuration for Responses API text format. + OpenAIResponseUsageInputTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageInputTokensDetails + description: Token details for input tokens in OpenAI response usage. + OpenAIResponseUsageOutputTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageOutputTokensDetails + description: Token details for output tokens in OpenAI response usage. + OpenAIUserMessageParam-Input: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OpenAIUserMessageParam-Output: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OutputTokensDetails: + properties: + reasoning_tokens: + type: integer + title: Reasoning Tokens + additionalProperties: true + type: object + required: + - reasoning_tokens + title: OutputTokensDetails + SearchRankingOptions: + properties: + ranker: + anyOf: + - type: string + - type: 'null' + score_threshold: + anyOf: + - type: number + - type: 'null' + default: 0.0 + type: object + title: SearchRankingOptions + description: Options for ranking and filtering search results. + _URLOrData: + properties: + url: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + data: + anyOf: + - type: string + - type: 'null' + contentEncoding: base64 + type: object + title: _URLOrData + description: A URL or a base64 encoded string + SamplingStrategy: + discriminator: + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + propertyName: type + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + GrammarResponseFormat: + description: Configuration for grammar-guided response generation. + properties: + type: + const: grammar + default: grammar + title: Type + type: string + bnf: + additionalProperties: true + title: Bnf + type: object + required: + - bnf + title: GrammarResponseFormat + type: object + JsonSchemaResponseFormat: + description: Configuration for JSON schema-guided response generation. + properties: + type: + const: json_schema + default: json_schema + title: Type + type: string + json_schema: + additionalProperties: true + title: Json Schema + type: object + required: + - json_schema + title: JsonSchemaResponseFormat + type: object + ResponseFormat: + discriminator: + mapping: + grammar: '#/components/schemas/GrammarResponseFormat' + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + propertyName: type + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + title: JsonSchemaResponseFormat + - $ref: '#/components/schemas/GrammarResponseFormat' + title: GrammarResponseFormat + title: JsonSchemaResponseFormat | GrammarResponseFormat + OpenAIResponseContentPart: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + MetricInResponse: + description: A metric value included in API responses. + properties: + metric: + title: Metric + type: string + value: + anyOf: + - type: integer + - type: number + title: integer | number + unit: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - metric + - value + title: MetricInResponse + type: object + TextDelta: + description: A text content delta for streaming responses. + properties: + type: + const: text + default: text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: TextDelta + type: object + ImageDelta: + description: An image content delta for streaming responses. + properties: + type: + const: image + default: image + title: Type + type: string + image: + format: binary + title: Image + type: string + required: + - image + title: ImageDelta + type: object + Fp8QuantizationConfig: + description: Configuration for 8-bit floating point quantization. + properties: + type: + const: fp8_mixed + default: fp8_mixed + title: Type + type: string + title: Fp8QuantizationConfig + type: object + Bf16QuantizationConfig: + description: Configuration for BFloat16 precision (typically no quantization). + properties: + type: + const: bf16 + default: bf16 + title: Type + type: string + title: Bf16QuantizationConfig + type: object + Int4QuantizationConfig: + description: Configuration for 4-bit integer quantization. + properties: + type: + const: int4_mixed + default: int4_mixed + title: Type + type: string + scheme: + anyOf: + - type: string + - type: 'null' + default: int4_weight_int8_dynamic_activation + title: Int4QuantizationConfig + type: object + UserMessage: + description: A message from the user in a chat conversation. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + context: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem | TextContentItem] + nullable: true + required: + - content + title: UserMessage + type: object + ToolResponseMessage: + description: A message representing the result of a tool invocation. + properties: + role: + const: tool + default: tool + title: Role + type: string + call_id: + title: Call Id + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + required: + - call_id + - content + title: ToolResponseMessage + type: object + TokenLogProbs: + description: Log probabilities for generated tokens. + properties: + logprobs_by_token: + additionalProperties: + type: number + title: Logprobs By Token + type: object + required: + - logprobs_by_token + title: TokenLogProbs + type: object + EmbeddingsResponse: + description: Response containing generated embeddings. + properties: + embeddings: + items: + items: + type: number + type: array + title: Embeddings + type: array + required: + - embeddings + title: EmbeddingsResponse + type: object + OpenAICompletionLogprobs: + description: |- + The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + properties: + text_offset: + anyOf: + - items: + type: integer + type: array + - type: 'null' + nullable: true + token_logprobs: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + tokens: + anyOf: + - items: + type: string + type: array + - type: 'null' + nullable: true + top_logprobs: + anyOf: + - items: + additionalProperties: + type: number + type: object + type: array + - type: 'null' + nullable: true + title: OpenAICompletionLogprobs + type: object + VectorStoreCreateRequest: + description: Request to create a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + file_ids: + items: + type: string + title: File Ids + type: array + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + chunking_strategy: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + additionalProperties: true + title: Metadata + type: object + title: VectorStoreCreateRequest + type: object + VectorStoreModifyRequest: + description: Request to modify a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + title: VectorStoreModifyRequest + type: object + VectorStoreSearchRequest: + description: Request to search a vector store. + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + max_num_results: + default: 10 + title: Max Num Results + type: integer + ranking_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + rewrite_query: + default: false + title: Rewrite Query + type: boolean + required: + - query + title: VectorStoreSearchRequest + type: object + DialogType: + description: Parameter type for dialog data with semantic output labels. + properties: + type: + const: dialog + default: dialog + title: Type + type: string + title: DialogType + type: object + ConversationMessage: + description: OpenAI-compatible message item for conversations. + properties: + id: + description: unique identifier for this message + title: Id + type: string + content: + description: message content + items: + additionalProperties: true + type: object + title: Content + type: array + role: + description: message role + title: Role + type: string + status: + description: message status + title: Status + type: string + type: + const: message + default: message + title: Type + type: string + object: + const: message + default: message + title: Object + type: string + required: + - id + - content + - role + - status + title: ConversationMessage + type: object + ConversationItemCreateRequest: + description: Request body for creating conversation items. + properties: + items: + description: Items to include in the conversation context. You may add up to 20 items at a time. + items: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + maxItems: 20 + title: Items + type: array + required: + - items + title: ConversationItemCreateRequest + type: object + ToolGroupInput: + description: Input data for registering a tool group. + properties: + toolgroup_id: + title: Toolgroup Id + type: string + provider_id: + title: Provider Id + type: string + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + nullable: true + title: URL + required: + - toolgroup_id + - provider_id + title: ToolGroupInput + type: object + Api: + description: Enumeration of all available APIs in the Llama Stack system. + enum: + - providers + - inference + - safety + - agents + - batches + - vector_io + - datasetio + - scoring + - eval + - post_training + - tool_runtime + - models + - shields + - vector_stores + - datasets + - scoring_functions + - benchmarks + - tool_groups + - files + - prompts + - conversations + - inspect + title: Api + type: string + ProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + required: + - api + - provider_type + - config_class + title: ProviderSpec + type: object + InlineProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + container_image: + anyOf: + - type: string + - type: 'null' + description: |2 + + The container image to use for this implementation. If one is provided, pip_packages will be ignored. + If a provider depends on other providers, the dependencies MUST NOT specify a container image. + nullable: true + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + title: InlineProviderSpec + type: object + RemoteProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + adapter_type: + description: Unique identifier for this adapter + title: Adapter Type + type: string + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + - adapter_type + title: RemoteProviderSpec + type: object + PostTrainingJobLogStream: + description: Stream of logs from a finetuning job. + properties: + job_uuid: + title: Job Uuid + type: string + log_lines: + items: + type: string + title: Log Lines + type: array + required: + - job_uuid + - log_lines + title: PostTrainingJobLogStream + type: object + RLHFAlgorithm: + description: Available reinforcement learning from human feedback algorithms. + enum: + - dpo + title: RLHFAlgorithm + type: string + PostTrainingRLHFRequest: + description: Request to finetune a model using reinforcement learning from human feedback. + properties: + job_uuid: + title: Job Uuid + type: string + finetuned_model: + $ref: '#/components/schemas/URL' + dataset_id: + title: Dataset Id + type: string + validation_dataset_id: + title: Validation Dataset Id + type: string + algorithm: + $ref: '#/components/schemas/RLHFAlgorithm' + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + hyperparam_search_config: + additionalProperties: true + title: Hyperparam Search Config + type: object + logger_config: + additionalProperties: true + title: Logger Config + type: object + required: + - job_uuid + - finetuned_model + - dataset_id + - validation_dataset_id + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + title: PostTrainingRLHFRequest + type: object responses: BadRequest400: description: The request was invalid or malformed @@ -4029,8 +8800,7 @@ components: title: Bad Request detail: The request was invalid or malformed TooManyRequests429: - description: >- - The client has sent too many requests in a given amount of time + description: The client has sent too many requests in a given amount of time content: application/json: schema: @@ -4038,11 +8808,9 @@ components: example: status: 429 title: Too Many Requests - detail: >- - You have exceeded the rate limit. Please try again later. + detail: You have exceeded the rate limit. Please try again later. InternalServerError500: - description: >- - The server encountered an unexpected error + description: The server encountered an unexpected error content: application/json: schema: @@ -4050,86 +8818,101 @@ components: example: status: 500 title: Internal Server Error - detail: >- - An unexpected error occurred. Our team has been notified. + detail: An unexpected error occurred DefaultError: - description: An unexpected error occurred + description: An error occurred content: application/json: schema: $ref: '#/components/schemas/Error' - example: - status: 0 - title: Error - detail: An unexpected error occurred -security: - - Default: [] tags: - - name: Agents - description: >- - APIs for creating and interacting with agentic systems. +- description: APIs for creating and interacting with agentic systems. + name: Agents + x-displayName: Agents +- description: |- + The API is designed to allow use of openai client libraries for seamless integration. + This API provides the following extensions: + - idempotent batch creation - ## Agents API (Experimental) + Note: This API is currently under active development and may undergo changes. + name: Batches + x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale. +- description: '' + name: Benchmarks +- description: Protocol for conversation management operations. + name: Conversations + x-displayName: Conversations +- description: '' + name: DatasetIO +- description: '' + name: Datasets +- description: Llama Stack Evaluation API for running evaluations on model and agent candidates. + name: Eval + x-displayName: Evaluations +- description: This API is used to upload documents that can be used with other Llama Stack APIs. + name: Files + x-displayName: Files +- description: |- + Llama Stack Inference API for generating completions, chat completions, and embeddings. - - > **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. - Great for exploring new capabilities and providing feedback to influence the - final design. - - - Main functionalities provided by this API: - - - - Create agents with specific instructions and ability to use tools. - - - Interactions with agents are grouped into sessions ("threads"), and each interaction - is called a "turn". - - - Agents can be provided with various tools (see the ToolGroups and ToolRuntime - APIs for more details). - - - Agents can be provided with various shields (see the Safety API for more details). - - - Agents can also use Memory to retrieve information from knowledge bases. See - the RAG Tool and Vector IO APIs for more details. - - - ### 🧪 Feedback Welcome - - - This API is actively being developed. We welcome feedback on: - - - API design and usability - - - Performance characteristics - - - Missing features or capabilities - - - Integration patterns - - - **Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) - or [GitHub Issues](https://github.com/llamastack/llama-stack/issues) - x-displayName: Agents - - name: Benchmarks - description: '' - - name: DatasetIO - description: '' - - name: Datasets - description: '' - - name: Eval - description: >- - Llama Stack Evaluation API for running evaluations on model and agent candidates. - x-displayName: Evaluations - - name: PostTraining (Coming Soon) - description: '' + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + name: Inference + x-displayName: Inference +- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + name: Inspect + x-displayName: Inspect +- description: '' + name: Models +- description: '' + name: PostTraining (Coming Soon) +- description: Protocol for prompt management operations. + name: Prompts + x-displayName: Prompts +- description: Providers API for inspecting, listing, and modifying providers and their configurations. + name: Providers + x-displayName: Providers +- description: OpenAI-compatible Moderations API. + name: Safety + x-displayName: Safety +- description: '' + name: Scoring +- description: '' + name: ScoringFunctions +- description: '' + name: Shields +- description: '' + name: ToolGroups +- description: '' + name: ToolRuntime +- description: '' + name: VectorIO x-tagGroups: - - name: Operations - tags: - - Agents - - Benchmarks - - DatasetIO - - Datasets - - Eval - - PostTraining (Coming Soon) +- name: Operations + tags: + - Agents + - Batches + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - ToolGroups + - ToolRuntime + - VectorIO +security: +- Default: [] diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html deleted file mode 100644 index 61deaec1e..000000000 --- a/docs/static/llama-stack-spec.html +++ /dev/null @@ -1,13362 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - - - - - - - diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index c6197b36f..a593fef85 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -1,17 +1,156 @@ openapi: 3.1.0 info: title: Llama Stack Specification - version: v1 - description: >- + description: |- This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are - tailored to - best leverage Llama Models. + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. - **✅ STABLE**: Production-ready APIs with backward compatibility guarantees. + **✅ STABLE**: Production-ready APIs with backward compatibility guarantees. + version: v1 servers: - - url: http://any-hosted-llama-stack.com +- url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: List Batches + description: List all batches for the current user. + operationId: list_batches_v1_batches_get + parameters: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + type: integer + default: 20 + title: Limit + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: Create Batch + description: Create a new batch for processing multiple API requests. + operationId: create_batch_v1_batches_post + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Retrieve Batch + description: Retrieve information about a specific batch. + operationId: retrieve_batch_v1_batches__batch_id__get + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel Batch + description: Cancel a batch that is in progress. + operationId: cancel_batch_v1_batches__batch_id__cancel_post + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/chat/completions: get: responses: @@ -23,48 +162,56 @@ paths: $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: List chat completions. + - Inference + summary: List Chat Completions description: List chat completions. + operationId: list_chat_completions_v1_chat_completions_get parameters: - - name: after - in: query - description: >- - The ID of the last chat completion to return. - required: false - schema: - type: string - - name: limit - in: query - description: >- - The maximum number of chat completions to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort the chat completions by: "asc" or "desc". Defaults to - "desc". - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -72,35 +219,36 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletion' - - $ref: '#/components/schemas/OpenAIChatCompletionChunk' + $ref: '#/components/schemas/OpenAIChatCompletion' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIChatCompletionChunk' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: Create chat completions. - description: >- + - Inference + summary: Openai Chat Completion + description: |- Create chat completions. - Generate an OpenAI-compatible chat completion for the given messages using - the specified model. - parameters: [] + Generate an OpenAI-compatible chat completion for the given messages using the specified model. + operationId: openai_chat_completion_v1_chat_completions_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' - required: true - deprecated: false /v1/chat/completions/{completion_id}: get: responses: @@ -111,30 +259,32 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletionWithInputMessages' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Get chat completion. - description: >- + - Inference + summary: Get Chat Completion + description: |- Get chat completion. Describe a chat completion by its ID. + operationId: get_chat_completion_v1_chat_completions__completion_id__get parameters: - - name: completion_id - in: path - description: ID of the chat completion. - required: true - schema: - type: string - deprecated: false + - name: completion_id + in: path + required: true + schema: + type: string + description: 'Path parameter: completion_id' /v1/completions: post: responses: @@ -145,31 +295,31 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletion' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create completion. - description: >- + - Inference + summary: Openai Completion + description: |- Create completion. - Generate an OpenAI-compatible completion for the given prompt using the specified - model. - parameters: [] + Generate an OpenAI-compatible completion for the given prompt using the specified model. + operationId: openai_completion_v1_completions_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' required: true - deprecated: false /v1/conversations: post: responses: @@ -180,30 +330,31 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Create a conversation. - description: >- + - Conversations + summary: Create Conversation + description: |- Create a conversation. Create a conversation. - parameters: [] + operationId: create_conversation_v1_conversations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreateConversationRequest' required: true - deprecated: false /v1/conversations/{conversation_id}: get: responses: @@ -214,30 +365,32 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve a conversation. - description: >- + - Conversations + summary: Get Conversation + description: |- Retrieve a conversation. Get a conversation with the given ID. + operationId: get_conversation_v1_conversations__conversation_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' post: responses: '200': @@ -247,36 +400,38 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Update a conversation. - description: >- + - Conversations + summary: Update Conversation + description: |- Update a conversation. Update a conversation's metadata with the given ID. + operationId: update_conversation_v1_conversations__conversation_id__post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/UpdateConversationRequest' required: true - deprecated: false delete: responses: '200': @@ -286,30 +441,32 @@ paths: schema: $ref: '#/components/schemas/ConversationDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete a conversation. - description: >- + - Conversations + summary: Openai Delete Conversation + description: |- Delete a conversation. Delete a conversation with the given ID. + operationId: openai_delete_conversation_v1_conversations__conversation_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' /v1/conversations/{conversation_id}/items: get: responses: @@ -321,173 +478,68 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: List items. - description: >- + - Conversations + summary: List Items + description: |- List items. List items in the conversation. + operationId: list_items_v1_conversations__conversation_id__items_get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - enum: + - asc + - desc type: string - - name: after - in: query - description: >- - An item ID to list items after, used in pagination. - required: true - schema: - oneOf: - - type: string - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: include - in: query - description: >- - Specify additional output data to include in the response. - required: true - schema: - oneOf: - - type: array - items: - type: string - enum: - - code_interpreter_call.outputs - - computer_call_output.output.image_url - - file_search_call.results - - message.input_image.image_url - - message.output_text.logprobs - - reasoning.encrypted_content - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: limit - in: query - description: >- - A limit on the number of objects to be returned (1-100, default 20). - required: true - schema: - oneOf: - - type: integer - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: order - in: query - description: >- - The order to return items in (asc or desc, default desc). - required: true - schema: - oneOf: - - type: string - enum: - - asc - - desc - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - deprecated: false + - type: 'null' + title: Order + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array + items: + $ref: '#/components/schemas/ConversationItemInclude' + - type: 'null' + title: Include post: responses: '200': @@ -498,35 +550,37 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: Create items. - description: >- + - Conversations + summary: Add Items + description: |- Create items. Create items in the conversation. + operationId: add_items_v1_conversations__conversation_id__items_post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/AddItemsRequest' - required: true - deprecated: false /v1/conversations/{conversation_id}/items/{item_id}: get: responses: @@ -535,38 +589,40 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ConversationItem' + $ref: '#/components/schemas/OpenAIResponseMessage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve an item. - description: >- + - Conversations + summary: Retrieve + description: |- Retrieve an item. Retrieve a conversation item. + operationId: retrieve_v1_conversations__conversation_id__items__item_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' delete: responses: '200': @@ -576,408 +632,378 @@ paths: schema: $ref: '#/components/schemas/ConversationItemDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete an item. - description: >- + - Conversations + summary: Openai Delete Conversation Item + description: |- Delete an item. Delete a conversation item. + operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' /v1/embeddings: post: responses: '200': - description: >- - An OpenAIEmbeddingsResponse containing the embeddings. + description: An OpenAIEmbeddingsResponse containing the embeddings. content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create embeddings. - description: >- + - Inference + summary: Openai Embeddings + description: |- Create embeddings. - Generate OpenAI-compatible embeddings for the given input using the specified - model. - parameters: [] + Generate OpenAI-compatible embeddings for the given input using the specified model. + operationId: openai_embeddings_v1_embeddings_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true - deprecated: false /v1/files: get: responses: '200': - description: >- - An ListOpenAIFileResponse containing the list of files. + description: An ListOpenAIFileResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/ListOpenAIFileResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: List files. - description: >- + - Files + summary: Openai List Files + description: |- List files. Returns a list of files that belong to the user's organization. + operationId: openai_list_files_v1_files_get parameters: - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. For instance, if you make a list request and receive - 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo - in order to fetch the next page of the list. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 10,000, and the default is 10,000. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - $ref: '#/components/schemas/Order' - - name: purpose - in: query - description: >- - Only return files with the given purpose. - required: false - schema: - $ref: '#/components/schemas/OpenAIFilePurpose' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 10000 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: purpose + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/OpenAIFilePurpose' + - type: 'null' + title: Purpose post: responses: '200': - description: >- - An OpenAIFileObject representing the uploaded file. + description: An OpenAIFileObject representing the uploaded file. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: Upload file. - description: >- + - Files + summary: Openai Upload File + description: |- Upload file. Upload a file that can be used across various endpoints. - The file upload should be a multipart form request with: - - file: The File object (not file name) to be uploaded. - - purpose: The intended purpose of the uploaded file. - - expires_after: Optional form values describing expiration for the file. - parameters: [] + operationId: openai_upload_file_v1_files_post requestBody: + required: true content: multipart/form-data: schema: - type: object - properties: - file: - type: string - format: binary - purpose: - $ref: '#/components/schemas/OpenAIFilePurpose' - expires_after: - $ref: '#/components/schemas/ExpiresAfter' - required: - - file - - purpose - required: true - deprecated: false + $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post' /v1/files/{file_id}: get: responses: '200': - description: >- - An OpenAIFileObject containing file information. + description: An OpenAIFileObject containing file information. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file. - description: >- + - Files + summary: Openai Retrieve File + description: |- Retrieve file. Returns information about a specific file. + operationId: openai_retrieve_file_v1_files__file_id__get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' delete: responses: '200': - description: >- - An OpenAIFileDeleteResponse indicating successful deletion. + description: An OpenAIFileDeleteResponse indicating successful deletion. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Delete file. + - Files + summary: Openai Delete File description: Delete file. + operationId: openai_delete_file_v1_files__file_id__delete parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/files/{file_id}/content: get: responses: '200': - description: >- - The raw file content as a binary response. + description: The raw file content as a binary response. content: application/json: schema: $ref: '#/components/schemas/Response' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file content. - description: >- + - Files + summary: Openai Retrieve File Content + description: |- Retrieve file content. Returns the contents of the specified file. + operationId: openai_retrieve_file_content_v1_files__file_id__content_get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/health: get: responses: '200': - description: >- - Health information indicating if the service is operational. + description: Health information indicating if the service is operational. content: application/json: schema: $ref: '#/components/schemas/HealthInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get health status. - description: >- + - Inspect + summary: Health + description: |- Get health status. Get the current health status of the service. - parameters: [] - deprecated: false + operationId: health_v1_health_get /v1/inspect/routes: get: responses: '200': - description: >- - Response containing information about all available routes. + description: Response containing information about all available routes. content: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inspect - summary: List routes. - description: >- + - Inspect + summary: List Routes + description: |- List routes. List all available API routes with their methods and implementing providers. - parameters: [] - deprecated: false + operationId: list_routes_v1_inspect_routes_get + parameters: + - name: api_filter + in: query + required: false + schema: + anyOf: + - enum: + - v1 + - v1alpha + - v1beta + - deprecated + type: string + - type: 'null' + title: Api Filter /v1/models: get: responses: '200': - description: A ListModelsResponse. + description: A OpenAIListModelsResponse. content: application/json: schema: - $ref: '#/components/schemas/ListModelsResponse' + $ref: '#/components/schemas/OpenAIListModelsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: List all models. - description: List all models. - parameters: [] - deprecated: false - post: - responses: - '200': - description: A Model. - content: - application/json: - schema: - $ref: '#/components/schemas/Model' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Models - summary: Register model. - description: >- - Register model. - - Register a model. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterModelRequest' - required: true - deprecated: false + - Models + summary: Openai List Models + description: List models using the OpenAI API. + operationId: openai_list_models_v1_models_get /v1/models/{model_id}: get: responses: @@ -988,60 +1014,32 @@ paths: schema: $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: Get model. - description: >- + - Models + summary: Get Model + description: |- Get model. Get a model by its identifier. + operationId: get_model_v1_models__model_id__get parameters: - - name: model_id - in: path - description: The identifier of the model to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Models - summary: Unregister model. - description: >- - Unregister model. - - Unregister a model. - parameters: - - name: model_id - in: path - description: >- - The identifier of the model to unregister. - required: true - schema: - type: string - deprecated: false + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' /v1/moderations: post: responses: @@ -1052,56 +1050,57 @@ paths: schema: $ref: '#/components/schemas/ModerationObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Create moderation. - description: >- + - Safety + summary: Run Moderation + description: |- Create moderation. Classifies if text and/or image inputs are potentially harmful. - parameters: [] + operationId: run_moderation_v1_moderations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunModerationRequest' required: true - deprecated: false /v1/prompts: get: responses: '200': - description: >- - A ListPromptsResponse containing all prompts. + description: A ListPromptsResponse containing all prompts. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List all prompts. + - Prompts + summary: List Prompts description: List all prompts. - parameters: [] - deprecated: false + operationId: list_prompts_v1_prompts_get post: responses: '200': @@ -1111,30 +1110,31 @@ paths: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Create prompt. - description: >- + - Prompts + summary: Create Prompt + description: |- Create prompt. Create a new prompt. - parameters: [] + operationId: create_prompt_v1_prompts_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreatePromptRequest' required: true - deprecated: false /v1/prompts/{prompt_id}: get: responses: @@ -1146,246 +1146,254 @@ paths: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Get prompt. - description: >- + - Prompts + summary: Get Prompt + description: |- Get prompt. Get a prompt by its identifier and optional version. + operationId: get_prompt_v1_prompts__prompt_id__get parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to get. - required: true - schema: - type: string - - name: version - in: query - description: >- - The version of the prompt to get (defaults to latest). - required: false - schema: - type: integer - deprecated: false + - name: version + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Version + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' post: responses: '200': - description: >- - The updated Prompt resource with incremented version. + description: The updated Prompt resource with incremented version. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Update prompt. - description: >- + - Prompts + summary: Update Prompt + description: |- Update prompt. Update an existing prompt (increments version). + operationId: update_prompt_v1_prompts__prompt_id__post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to update. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/UpdatePromptRequest' - required: true - deprecated: false delete: responses: - '200': - description: OK '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response + '204': + description: Successful Response tags: - - Prompts - summary: Delete prompt. - description: >- + - Prompts + summary: Delete Prompt + description: |- Delete prompt. Delete a prompt. + operationId: delete_prompt_v1_prompts__prompt_id__delete parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to delete. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/prompts/{prompt_id}/set-default-version: post: responses: '200': - description: >- - The prompt with the specified version now set as default. + description: The prompt with the specified version now set as default. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Set prompt version. - description: >- + - Prompts + summary: Set Default Version + description: |- Set prompt version. Set which version of a prompt should be the default in get_prompt (latest). + operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/SetDefaultVersionRequest' required: true - deprecated: false /v1/prompts/{prompt_id}/versions: get: responses: '200': - description: >- - A ListPromptsResponse containing all versions of the prompt. + description: A ListPromptsResponse containing all versions of the prompt. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List prompt versions. - description: >- + - Prompts + summary: List Prompt Versions + description: |- List prompt versions. List all versions of a specific prompt. + operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get parameters: - - name: prompt_id - in: path - description: >- - The identifier of the prompt to list versions for. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/providers: get: responses: '200': - description: >- - A ListProvidersResponse containing information about all providers. + description: A ListProvidersResponse containing information about all providers. content: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: List providers. - description: >- + - Providers + summary: List Providers + description: |- List providers. List all available providers. - parameters: [] - deprecated: false + operationId: list_providers_v1_providers_get /v1/providers/{provider_id}: get: responses: '200': - description: >- - A ProviderInfo object containing the provider's details. + description: A ProviderInfo object containing the provider's details. content: application/json: schema: $ref: '#/components/schemas/ProviderInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: Get provider. - description: >- + - Providers + summary: Inspect Provider + description: |- Get provider. Get detailed information about a specific provider. + operationId: inspect_provider_v1_providers__provider_id__get parameters: - - name: provider_id - in: path - description: The ID of the provider to inspect. - required: true - schema: - type: string - deprecated: false + - name: provider_id + in: path + required: true + schema: + type: string + description: 'Path parameter: provider_id' /v1/responses: get: responses: @@ -1397,45 +1405,56 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List all responses. + - Agents + summary: List Openai Responses description: List all responses. + operationId: list_openai_responses_v1_responses_get parameters: - - name: after - in: query - description: The ID of the last response to return. - required: false - schema: - type: string - - name: limit - in: query - description: The number of responses to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter responses by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort responses by when sorted by created_at ('asc' or 'desc'). - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 50 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -1449,38 +1468,51 @@ paths: $ref: '#/components/schemas/OpenAIResponseObjectStream' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: Create a model response. + - Agents + summary: Create Openai Response description: Create a model response. - parameters: [] + operationId: create_openai_response_v1_responses_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/CreateOpenaiResponseRequest' - required: true - deprecated: false - x-llama-stack-extra-body-params: - - name: guardrails - schema: - type: array - items: - oneOf: + x-llama-stack-extra-body-params: + guardrails: + $defs: + ResponseGuardrailSpec: + description: |- + Specification for a guardrail to apply during response generation. + + :param type: The type/identifier of the guardrail. + properties: + type: + title: Type + type: string + required: + - type + title: ResponseGuardrailSpec + type: object + anyOf: + - items: + anyOf: - type: string - $ref: '#/components/schemas/ResponseGuardrailSpec' - description: >- - List of guardrails to apply during response generation. Guardrails provide - safety and content moderation. - required: false + type: array + - type: 'null' + description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation. /v1/responses/{response_id}: get: responses: @@ -1491,28 +1523,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Get a model response. + - Agents + summary: Get Openai Response description: Get a model response. + operationId: get_openai_response_v1_responses__response_id__get parameters: - - name: response_id - in: path - description: >- - The ID of the OpenAI response to retrieve. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' delete: responses: '200': @@ -1522,27 +1555,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIDeleteResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Delete a response. + - Agents + summary: Delete Openai Response description: Delete a response. + operationId: delete_openai_response_v1_responses__response_id__delete parameters: - - name: response_id - in: path - description: The ID of the OpenAI response to delete. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' /v1/responses/{response_id}/input_items: get: responses: @@ -1554,65 +1589,72 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseInputItem' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List input items. + - Agents + summary: List Openai Response Input Items description: List input items. + operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get parameters: - - name: response_id - in: path - description: >- - The ID of the response to retrieve input items for. - required: true - schema: - type: string - - name: after - in: query - description: >- - An item ID to list items after, used for pagination. - required: false - schema: - type: string - - name: before - in: query - description: >- - An item ID to list items before, used for pagination. - required: false - schema: - type: string - - name: include - in: query - description: >- - Additional fields to include in the response. - required: false - schema: - type: array + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array items: type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - The order to return the input items in. Default is desc. - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - type: 'null' + title: Include /v1/safety/run-shield: post: responses: @@ -1623,30 +1665,31 @@ paths: schema: $ref: '#/components/schemas/RunShieldResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Run shield. - description: >- + - Safety + summary: Run Shield + description: |- Run shield. Run a shield. - parameters: [] + operationId: run_shield_v1_safety_run_shield_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunShieldRequest' required: true - deprecated: false /v1/scoring-functions: get: responses: @@ -1657,47 +1700,22 @@ paths: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: List all scoring functions. + - Scoring Functions + summary: List Scoring Functions description: List all scoring functions. - parameters: [] - deprecated: false - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ScoringFunctions - summary: Register a scoring function. - description: Register a scoring function. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterScoringFunctionRequest' - required: true - deprecated: false + operationId: list_scoring_functions_v1_scoring_functions_get /v1/scoring-functions/{scoring_fn_id}: get: responses: @@ -1708,86 +1726,61 @@ paths: schema: $ref: '#/components/schemas/ScoringFn' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: Get a scoring function by its ID. + - Scoring Functions + summary: Get Scoring Function description: Get a scoring function by its ID. + operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get parameters: - - name: scoring_fn_id - in: path - description: The ID of the scoring function to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ScoringFunctions - summary: Unregister a scoring function. - description: Unregister a scoring function. - parameters: - - name: scoring_fn_id - in: path - description: >- - The ID of the scoring function to unregister. - required: true - schema: - type: string - deprecated: false + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' /v1/scoring/score: post: responses: '200': - description: >- - A ScoreResponse object containing rows and aggregated results. + description: A ScoreResponse object containing rows and aggregated results. content: application/json: schema: $ref: '#/components/schemas/ScoreResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a list of rows. + - Scoring + summary: Score description: Score a list of rows. - parameters: [] + operationId: score_v1_scoring_score_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreRequest' required: true - deprecated: false /v1/scoring/score-batch: post: responses: @@ -1798,27 +1791,28 @@ paths: schema: $ref: '#/components/schemas/ScoreBatchResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a batch of rows. + - Scoring + summary: Score Batch description: Score a batch of rows. - parameters: [] + operationId: score_batch_v1_scoring_score_batch_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreBatchRequest' required: true - deprecated: false /v1/shields: get: responses: @@ -1829,51 +1823,22 @@ paths: schema: $ref: '#/components/schemas/ListShieldsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: List all shields. + - Shields + summary: List Shields description: List all shields. - parameters: [] - deprecated: false - post: - responses: - '200': - description: A Shield. - content: - application/json: - schema: - $ref: '#/components/schemas/Shield' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Shields - summary: Register a shield. - description: Register a shield. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterShieldRequest' - required: true - deprecated: false + operationId: list_shields_v1_shields_get /v1/shields/{identifier}: get: responses: @@ -1884,419 +1849,57 @@ paths: schema: $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: Get a shield by its identifier. + - Shields + summary: Get Shield description: Get a shield by its identifier. + operationId: get_shield_v1_shields__identifier__get parameters: - - name: identifier - in: path - description: The identifier of the shield to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Shields - summary: Unregister a shield. - description: Unregister a shield. - parameters: - - name: identifier - in: path - description: >- - The identifier of the shield to unregister. - required: true - schema: - type: string - deprecated: false - /v1/synthetic-data-generation/generate: - post: - responses: - '200': - description: >- - Response containing filtered synthetic data samples and optional statistics - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - SyntheticDataGeneration (Coming Soon) - summary: >- - Generate synthetic data based on input dialogs and apply filtering. - description: >- - Generate synthetic data based on input dialogs and apply filtering. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerateRequest' + - name: identifier + in: path required: true - deprecated: false - /v1/tool-runtime/invoke: - post: - responses: - '200': - description: A ToolInvocationResult. - content: - application/json: - schema: - $ref: '#/components/schemas/ToolInvocationResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: Run a tool with the given arguments. - description: Run a tool with the given arguments. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InvokeToolRequest' - required: true - deprecated: false - /v1/tool-runtime/list-tools: - get: - responses: - '200': - description: A ListToolDefsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListToolDefsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: List all tools in the runtime. - description: List all tools in the runtime. - parameters: - - name: tool_group_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - - name: mcp_endpoint - in: query - description: >- - The MCP endpoint to use for the tool group. - required: false - schema: - $ref: '#/components/schemas/URL' - deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false - /v1/toolgroups: - get: - responses: - '200': - description: A ListToolGroupsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListToolGroupsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: List tool groups with optional provider. - description: List tool groups with optional provider. - parameters: [] - deprecated: false - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: Register a tool group. - description: Register a tool group. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterToolGroupRequest' - required: true - deprecated: false - /v1/toolgroups/{toolgroup_id}: - get: - responses: - '200': - description: A ToolGroup. - content: - application/json: - schema: - $ref: '#/components/schemas/ToolGroup' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: Get a tool group by its ID. - description: Get a tool group by its ID. - parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to get. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: Unregister a tool group. - description: Unregister a tool group. - parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to unregister. - required: true - schema: - type: string - deprecated: false - /v1/tools: - get: - responses: - '200': - description: A ListToolDefsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/ListToolDefsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: List tools with optional tool group. - description: List tools with optional tool group. - parameters: - - name: toolgroup_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - deprecated: false - /v1/tools/{tool_name}: - get: - responses: - '200': - description: A ToolDef. - content: - application/json: - schema: - $ref: '#/components/schemas/ToolDef' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolGroups - summary: Get a tool by its name. - description: Get a tool by its name. - parameters: - - name: tool_name - in: path - description: The name of the tool to get. - required: true - schema: - type: string - deprecated: false + schema: + type: string + description: 'Path parameter: identifier' /v1/vector-io/insert: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - VectorIO - summary: Insert chunks into a vector database. + - Vector Io + summary: Insert Chunks description: Insert chunks into a vector database. - parameters: [] + operationId: insert_chunks_v1_vector_io_insert_post requestBody: content: application/json: schema: $ref: '#/components/schemas/InsertChunksRequest' required: true - deprecated: false /v1/vector-io/query: post: responses: @@ -2307,1887 +1910,2043 @@ paths: schema: $ref: '#/components/schemas/QueryChunksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Query chunks from a vector database. + - Vector Io + summary: Query Chunks description: Query chunks from a vector database. - parameters: [] + operationId: query_chunks_v1_vector_io_query_post requestBody: content: application/json: schema: $ref: '#/components/schemas/QueryChunksRequest' required: true - deprecated: false /v1/vector_stores: get: responses: '200': - description: >- - A VectorStoreListResponse containing the list of vector stores. + description: A VectorStoreListResponse containing the list of vector stores. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Returns a list of vector stores. + - Vector Io + summary: Openai List Vector Stores description: Returns a list of vector stores. + operationId: openai_list_vector_stores_v1_vector_stores_get parameters: - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order post: responses: '200': - description: >- - A VectorStoreObject representing the created vector store. + description: A VectorStoreObject representing the created vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Creates a vector store. - description: >- + - Vector Io + summary: Openai Create Vector Store + description: |- Creates a vector store. Generate an OpenAI-compatible vector store with the given parameters. - parameters: [] + operationId: openai_create_vector_store_v1_vector_stores_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}: get: responses: '200': - description: >- - A VectorStoreObject representing the vector store. + description: A VectorStoreObject representing the vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store. + - Vector Io + summary: Openai Retrieve Vector Store description: Retrieves a vector store. + operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreObject representing the updated vector store. + description: A VectorStoreObject representing the updated vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store. + - Vector Io + summary: Openai Update Vector Store description: Updates a vector store. + operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreDeleteResponse indicating the deletion status. + description: A VectorStoreDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store. + - Vector Io + summary: Openai Delete Vector Store description: Delete a vector store. + operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' /v1/vector_stores/{vector_store_id}/file_batches: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the created file batch. + description: A VectorStoreFileBatchObject representing the created file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Create a vector store file batch. - description: >- + - Vector Io + summary: Openai Create Vector Store File Batch + description: |- Create a vector store file batch. - Generate an OpenAI-compatible vector store file batch for the given vector - store. + Generate an OpenAI-compatible vector store file batch for the given vector store. + operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true - deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: get: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the file batch. + description: A VectorStoreFileBatchObject representing the file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieve a vector store file batch. + - Vector Io + summary: Openai Retrieve Vector Store File Batch description: Retrieve a vector store file batch. + operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get parameters: - - name: batch_id - in: path - description: The ID of the file batch to retrieve. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the cancelled file batch. + description: A VectorStoreFileBatchObject representing the cancelled file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Cancels a vector store file batch. + - Vector Io + summary: Openai Cancel Vector Store File Batch description: Cancels a vector store file batch. + operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post parameters: - - name: batch_id - in: path - description: The ID of the file batch to cancel. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: get: responses: '200': - description: >- - A VectorStoreFilesListInBatchResponse containing the list of files in - the batch. + description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Returns a list of vector store files in a batch. - description: >- - Returns a list of vector store files in a batch. + - Vector Io + summary: Openai List Files In Vector Store File Batch + description: Returns a list of vector store files in a batch. + operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get parameters: - - name: batch_id - in: path - description: >- - The ID of the file batch to list files from. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - Filter by file status. One of in_progress, completed, failed, cancelled. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Filter + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/files: get: responses: '200': - description: >- - A VectorStoreListFilesResponse containing the list of files. + description: A VectorStoreListFilesResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListFilesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: List files in a vector store. + - Vector Io + summary: Openai List Files In Vector Store description: List files in a vector store. + operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to list files from. - required: true - schema: - type: string - - name: limit - in: query - description: >- - (Optional) A limit on the number of objects to be returned. Limit can - range between 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - (Optional) Sort order by the `created_at` timestamp of the objects. `asc` - for ascending order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - (Optional) A cursor for use in pagination. `after` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - (Optional) A cursor for use in pagination. `before` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - (Optional) Filter by file status to only return files with the specified - status. - required: false - schema: - $ref: '#/components/schemas/VectorStoreFileStatus' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + title: Filter + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + nullable: true + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the attached file. + description: A VectorStoreFileObject representing the attached file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Attach a file to a vector store. + - Vector Io + summary: Openai Attach File To Vector Store description: Attach a file to a vector store. + operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to attach the file to. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}/files/{file_id}: get: responses: '200': - description: >- - A VectorStoreFileObject representing the file. + description: A VectorStoreFileObject representing the file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File description: Retrieves a vector store file. + operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the updated file. + description: A VectorStoreFileObject representing the updated file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store file. + - Vector Io + summary: Openai Update Vector Store File description: Updates a vector store file. + operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to update. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreFileDeleteResponse indicating the deletion status. + description: A VectorStoreFileDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store file. + - Vector Io + summary: Openai Delete Vector Store File description: Delete a vector store file. + operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to delete. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/files/{file_id}/content: get: responses: '200': - description: >- - A list of InterleavedContent representing the file contents. + description: File contents, optionally with embeddings and metadata based on query parameters. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Retrieves the contents of a vector store file. - description: >- - Retrieves the contents of a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File Contents + description: Retrieves the contents of a vector store file. + operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: include_embeddings + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Embeddings + - name: include_metadata + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Metadata + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/search: post: responses: '200': - description: >- - A VectorStoreSearchResponse containing the search results. + description: A VectorStoreSearchResponse containing the search results. content: application/json: schema: $ref: '#/components/schemas/VectorStoreSearchResponsePage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Search for chunks in a vector store. - description: >- + - Vector Io + summary: Openai Search Vector Store + description: |- Search for chunks in a vector store. - Searches a vector store for relevant chunks based on a query and optional - file attribute filters. + Searches a vector store for relevant chunks based on a query and optional file attribute filters. + operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to search. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' required: true - deprecated: false /v1/version: get: responses: '200': - description: >- - Version information containing the service version number. + description: Version information containing the service version number. content: application/json: schema: $ref: '#/components/schemas/VersionInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get version. - description: >- + - Inspect + summary: Version + description: |- Get version. Get the version of the service. - parameters: [] - deprecated: false -jsonSchemaDialect: >- - https://json-schema.org/draft/2020-12/schema + operationId: version_v1_version_get components: schemas: Error: - type: object + description: Error response from the API. Roughly follows RFC 7807. properties: status: + title: Status type: integer - description: HTTP status code title: + title: Title type: string - description: >- - Error title, a short summary of the error which is invariant for an error - type detail: + title: Detail type: string - description: >- - Error detail, a longer human-readable description of the error instance: - type: string - description: >- - (Optional) A URL which can be used to retrieve more information about - the specific occurrence of the error - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - status - - title - - detail + - status + - title + - detail title: Error - description: >- - Error response from the API. Roughly follows RFC 7807. - Order: - type: string - enum: - - asc - - desc - title: Order - description: Sort order for paginated responses. - ListOpenAIChatCompletionResponse: type: object + ListBatchesResponse: properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - description: >- - List of chat completion objects with their input messages - has_more: - type: boolean - description: >- - Whether there are more completions available beyond this list - first_id: - type: string - description: ID of the first completion in this list - last_id: - type: string - description: ID of the last completion in this list object: type: string const: list + title: Object default: list - description: >- - Must be "list" to identify this as a list response - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIChatCompletionResponse - description: >- - Response from listing OpenAI-compatible chat completions. - OpenAIAssistantMessageParam: + data: + items: + $ref: '#/components/schemas/Batch' + type: array + title: Data + description: List of batch objects + first_id: + anyOf: + - type: string + - type: 'null' + description: ID of the first batch in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: ID of the last batch in the list + has_more: + type: boolean + title: Has More + description: Whether there are more batches available + default: false type: object + required: + - data + title: ListBatchesResponse + description: Response containing a list of batch objects. + CreateBatchRequest: + properties: + input_file_id: + type: string + title: Input File Id + endpoint: + type: string + title: Endpoint + completion_window: + type: string + const: 24h + title: Completion Window + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + idempotency_key: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + properties: + id: + type: string + title: Id + completion_window: + type: string + title: Completion Window + created_at: + type: integer + title: Created At + endpoint: + type: string + title: Endpoint + input_file_id: + type: string + title: Input File Id + object: + type: string + const: batch + title: Object + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + title: Status + cancelled_at: + anyOf: + - type: integer + - type: 'null' + cancelling_at: + anyOf: + - type: integer + - type: 'null' + completed_at: + anyOf: + - type: integer + - type: 'null' + error_file_id: + anyOf: + - type: string + - type: 'null' + errors: + anyOf: + - $ref: '#/components/schemas/Errors' + title: Errors + - type: 'null' + title: Errors + expired_at: + anyOf: + - type: integer + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + failed_at: + anyOf: + - type: integer + - type: 'null' + finalizing_at: + anyOf: + - type: integer + - type: 'null' + in_progress_at: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + model: + anyOf: + - type: string + - type: 'null' + output_file_id: + anyOf: + - type: string + - type: 'null' + request_counts: + anyOf: + - $ref: '#/components/schemas/BatchRequestCounts' + title: BatchRequestCounts + - type: 'null' + title: BatchRequestCounts + usage: + anyOf: + - $ref: '#/components/schemas/BatchUsage' + title: BatchUsage + - type: 'null' + title: BatchUsage + additionalProperties: true + type: object + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIChatCompletionResponse + description: Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. properties: role: - type: string const: assistant default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the model's response - name: + title: Role type: string - description: >- - (Optional) The name of the assistant message participant. + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + nullable: true + name: + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: >- - List of tool calls. Each tool call is an OpenAIChatCompletionToolCall - object. - additionalProperties: false - required: - - role + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true title: OpenAIAssistantMessageParam - description: >- - A message containing the model's (assistant) response in an OpenAI-compatible - chat completion request. - "OpenAIChatCompletionContentPartImageParam": type: object + OpenAIChatCompletionContentPartImageParam: properties: type: type: string const: image_url + title: Type default: image_url - description: >- - Must be "image_url" to identify this as image content image_url: $ref: '#/components/schemas/OpenAIImageURL' - description: >- - Image URL specification and processing details - additionalProperties: false - required: - - type - - image_url - title: >- - OpenAIChatCompletionContentPartImageParam - description: >- - Image content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionContentPartParam: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - - $ref: '#/components/schemas/OpenAIFile' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - file: '#/components/schemas/OpenAIFile' - OpenAIChatCompletionContentPartTextParam: type: object + required: + - image_url + title: OpenAIChatCompletionContentPartImageParam + description: Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + OpenAIChatCompletionContentPartTextParam: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to identify this as text content text: type: string - description: The text content of the message - additionalProperties: false - required: - - type - - text - title: OpenAIChatCompletionContentPartTextParam - description: >- - Text content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionToolCall: + title: Text type: object + required: + - text + title: OpenAIChatCompletionContentPartTextParam + description: Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: properties: index: - type: integer - description: >- - (Optional) Index of the tool call in the list + anyOf: + - type: integer + - type: 'null' id: - type: string - description: >- - (Optional) Unique identifier for the tool call + anyOf: + - type: string + - type: 'null' type: type: string const: function + title: Type default: function - description: >- - Must be "function" to identify this as a function call function: - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' - description: (Optional) Function call details - additionalProperties: false - required: - - type - title: OpenAIChatCompletionToolCall - description: >- - Tool call specification for OpenAI-compatible chat completion responses. - OpenAIChatCompletionToolCallFunction: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + title: OpenAIChatCompletionToolCallFunction + - type: 'null' + title: OpenAIChatCompletionToolCallFunction type: object + title: OpenAIChatCompletionToolCall + description: Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: properties: name: - type: string - description: (Optional) Name of the function to call + anyOf: + - type: string + - type: 'null' arguments: - type: string - description: >- - (Optional) Arguments to pass to the function as a JSON string - additionalProperties: false - title: OpenAIChatCompletionToolCallFunction - description: >- - Function call details for OpenAI-compatible tool calls. - OpenAIChatCompletionUsage: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIChatCompletionToolCallFunction + description: Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: properties: prompt_tokens: type: integer - description: Number of tokens in the prompt + title: Prompt Tokens completion_tokens: type: integer - description: Number of tokens in the completion + title: Completion Tokens total_tokens: type: integer - description: Total tokens used (prompt + completion) + title: Total Tokens prompt_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - title: >- - OpenAIChatCompletionUsagePromptTokensDetails - description: >- - Token details for prompt tokens in OpenAI chat completion usage. + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails' + title: OpenAIChatCompletionUsagePromptTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsagePromptTokensDetails completion_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - title: >- - OpenAIChatCompletionUsageCompletionTokensDetails - description: >- - Token details for output tokens in OpenAI chat completion usage. - additionalProperties: false - required: - - prompt_tokens - - completion_tokens - - total_tokens - title: OpenAIChatCompletionUsage - description: >- - Usage information for OpenAI chat completion. - OpenAIChoice: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails' + title: OpenAIChatCompletionUsageCompletionTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsageCompletionTokensDetails type: object + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: Usage information for OpenAI chat completion. + OpenAIChoice: properties: message: oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam-Output | ... (5 variants) discriminator: propertyName: role mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' developer: '#/components/schemas/OpenAIDeveloperMessageParam' - description: The message from the model + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' finish_reason: type: string - description: The reason the model stopped generating + title: Finish Reason index: type: integer - description: The index of the choice + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - required: - - message - - finish_reason - - index - title: OpenAIChoice - description: >- - A choice from an OpenAI-compatible chat completion response. - OpenAIChoiceLogprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: properties: content: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' refusal: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - title: OpenAIChoiceLogprobs - description: >- - The log probabilities for the tokens in the message from an OpenAI-compatible - chat completion response. - OpenAIDeveloperMessageParam: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' type: object + title: OpenAIChoiceLogprobs + description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + OpenAIDeveloperMessageParam: properties: role: type: string const: developer + title: Role default: developer - description: >- - Must be "developer" to identify this as a developer message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the developer message + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the developer message participant. - additionalProperties: false - required: - - role - - content - title: OpenAIDeveloperMessageParam - description: >- - A message from the developer in an OpenAI-compatible chat completion request. - OpenAIFile: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAIDeveloperMessageParam + description: A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: properties: type: type: string const: file + title: Type default: file file: $ref: '#/components/schemas/OpenAIFileFile' - additionalProperties: false + type: object required: - - type - - file + - file title: OpenAIFile OpenAIFileFile: - type: object properties: file_data: - type: string + anyOf: + - type: string + - type: 'null' file_id: - type: string + anyOf: + - type: string + - type: 'null' filename: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object title: OpenAIFileFile OpenAIImageURL: - type: object properties: url: type: string - description: >- - URL of the image to include in the message + title: Url detail: - type: string - description: >- - (Optional) Level of detail for image processing. Can be "low", "high", - or "auto" - additionalProperties: false - required: - - url - title: OpenAIImageURL - description: >- - Image URL specification for OpenAI-compatible chat completion messages. - OpenAIMessageParam: - oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' - developer: '#/components/schemas/OpenAIDeveloperMessageParam' - OpenAISystemMessageParam: + anyOf: + - type: string + - type: 'null' type: object + required: + - url + title: OpenAIImageURL + description: Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + discriminator: + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam' + propertyName: role + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + title: OpenAIUserMessageParam + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + title: OpenAIAssistantMessageParam + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam | ... (5 variants) + OpenAISystemMessageParam: properties: role: type: string const: system + title: Role default: system - description: >- - Must be "system" to identify this as a system message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the system message participant. - additionalProperties: false - required: - - role - - content - title: OpenAISystemMessageParam - description: >- - A system message providing instructions or context to the model. - OpenAITokenLogProb: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAISystemMessageParam + description: A system message providing instructions or context to the model. + OpenAITokenLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number + title: Logprob top_logprobs: - type: array items: $ref: '#/components/schemas/OpenAITopLogProb' - additionalProperties: false - required: - - token - - logprob - - top_logprobs - title: OpenAITokenLogProb - description: >- - The log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIToolMessageParam: + type: array + title: Top Logprobs type: object + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: |- + The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + OpenAIToolMessageParam: properties: role: type: string const: tool + title: Role default: tool - description: >- - Must be "tool" to identify this as a tool response tool_call_id: type: string - description: >- - Unique identifier for the tool call this response is for + title: Tool Call Id content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The response content from the tool - additionalProperties: false - required: - - role - - tool_call_id - - content - title: OpenAIToolMessageParam - description: >- - A message representing the result of a tool invocation in an OpenAI-compatible - chat completion request. - OpenAITopLogProb: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] type: object + required: + - tool_call_id + - content + title: OpenAIToolMessageParam + description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + OpenAITopLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number - additionalProperties: false - required: - - token - - logprob - title: OpenAITopLogProb - description: >- - The top log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIUserMessageParam: + title: Logprob type: object + required: + - token + - logprob + title: OpenAITopLogProb + description: |- + The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + OpenAIUserMessageParam: + description: A message from the user in an OpenAI-compatible chat completion request. properties: role: - type: string const: user default: user - description: >- - Must be "user" to identify this as a user message - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' - description: >- - The content of the message, which can include text and other media - name: + title: Role type: string - description: >- - (Optional) The name of the user message participant. - additionalProperties: false + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + nullable: true required: - - role - - content + - content title: OpenAIUserMessageParam - description: >- - A message from the user in an OpenAI-compatible chat completion request. - OpenAIJSONSchema: type: object + OpenAIJSONSchema: properties: name: type: string - description: Name of the schema + title: Name description: - type: string - description: (Optional) Description of the schema + anyOf: + - type: string + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict adherence to the schema + anyOf: + - type: boolean + - type: 'null' schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The JSON schema definition - additionalProperties: false - required: - - name - title: OpenAIJSONSchema - description: >- - JSON schema specification for OpenAI-compatible structured response format. - OpenAIResponseFormatJSONObject: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: OpenAIJSONSchema + description: JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: properties: type: type: string const: json_object + title: Type default: json_object - description: >- - Must be "json_object" to indicate generic JSON object response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatJSONObject - description: >- - JSON object response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatJSONSchema: type: object + title: OpenAIResponseFormatJSONObject + description: JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: properties: type: type: string const: json_schema + title: Type default: json_schema - description: >- - Must be "json_schema" to indicate structured JSON response format json_schema: $ref: '#/components/schemas/OpenAIJSONSchema' - description: >- - The JSON schema specification for the response - additionalProperties: false - required: - - type - - json_schema - title: OpenAIResponseFormatJSONSchema - description: >- - JSON schema response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatParam: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseFormatText' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIResponseFormatText' - json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' - json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' - OpenAIResponseFormatText: type: object + required: + - json_schema + title: OpenAIResponseFormatJSONSchema + description: JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + discriminator: + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + OpenAIResponseFormatText: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to indicate plain text response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatText - description: >- - Text response format for OpenAI-compatible chat completion requests. - OpenAIChatCompletionRequestWithExtraBody: type: object + title: OpenAIResponseFormatText + description: Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: List of messages in the conversation. - frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - function_call: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The function call to use. - functions: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) List of functions to use. - logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. - logprobs: - type: boolean - description: (Optional) The log probabilities to use. - max_completion_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - n: - type: integer - description: >- - (Optional) The number of completions to generate. - parallel_tool_calls: - type: boolean - description: >- - (Optional) Whether to parallelize tool calls. - presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - response_format: - $ref: '#/components/schemas/OpenAIResponseFormatParam' - description: (Optional) The response format to use. - seed: - type: integer - description: (Optional) The seed to use. - stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. - stream: - type: boolean - description: >- - (Optional) Whether to stream the response. - stream_options: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. - temperature: - type: number - description: (Optional) The temperature to use. - tool_choice: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tool choice to use. - tools: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) type: array - items: + minItems: 1 + title: Messages + frequency_penalty: + anyOf: + - type: number + - type: 'null' + function_call: + anyOf: + - type: string + - additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tools to use. + - type: 'null' + title: string | object + functions: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_completion_tokens: + anyOf: + - type: integer + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + response_format: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + discriminator: + propertyName: type + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + - type: 'null' + title: Response Format + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + tools: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' top_logprobs: - type: integer - description: >- - (Optional) The top log probabilities to use. + anyOf: + - type: integer + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. - additionalProperties: false - required: - - model - - messages - title: OpenAIChatCompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible chat completion endpoint. - OpenAIChatCompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - additionalProperties: false - required: - - id - - choices - - object - - created - - model - title: OpenAIChatCompletion - description: >- - Response from an OpenAI-compatible chat completion request. - OpenAIChatCompletionChunk: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage type: object + required: + - id + - choices + - created + - model + title: OpenAIChatCompletion + description: Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + description: Chunk from a streaming response to an OpenAI-compatible chat completion request. properties: id: + title: Id type: string - description: The ID of the chat completion choices: - type: array items: $ref: '#/components/schemas/OpenAIChunkChoice' - description: List of choices + title: Choices + type: array object: - type: string const: chat.completion.chunk default: chat.completion.chunk - description: >- - The object type, which will be "chat.completion.chunk" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: + title: Object + type: string + created: + title: Created + type: integer + model: + title: Model type: string - description: >- - The model that was used to generate the chat completion usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information (typically included in final chunk with stream_options) - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + nullable: true + title: OpenAIChatCompletionUsage required: - - id - - choices - - object - - created - - model + - id + - choices + - created + - model title: OpenAIChatCompletionChunk - description: >- - Chunk from a streaming response to an OpenAI-compatible chat completion request. - OpenAIChoiceDelta: type: object + OpenAIChoiceDelta: + description: A delta from an OpenAI-compatible chat completion streaming response. properties: content: - type: string - description: (Optional) The content of the delta + anyOf: + - type: string + - type: 'null' + nullable: true refusal: - type: string - description: (Optional) The refusal of the delta + anyOf: + - type: string + - type: 'null' + nullable: true role: - type: string - description: (Optional) The role of the delta + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: (Optional) The tool calls of the delta + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true reasoning_content: - type: string - description: >- - (Optional) The reasoning content from the model (non-standard, for o1/o3 - models) - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true title: OpenAIChoiceDelta - description: >- - A delta from an OpenAI-compatible chat completion streaming response. - OpenAIChunkChoice: type: object + OpenAIChunkChoice: + description: A chunk choice from an OpenAI-compatible chat completion streaming response. properties: delta: $ref: '#/components/schemas/OpenAIChoiceDelta' - description: The delta from the chunk finish_reason: + title: Finish Reason type: string - description: The reason the model stopped generating index: + title: Index type: integer - description: The index of the choice logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + nullable: true + title: OpenAIChoiceLogprobs required: - - delta - - finish_reason - - index + - delta + - finish_reason + - index title: OpenAIChunkChoice - description: >- - A chunk choice from an OpenAI-compatible chat completion streaming response. - OpenAICompletionWithInputMessages: type: object + OpenAICompletionWithInputMessages: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage input_messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output | ... (5 variants) + type: array + title: Input Messages + type: object required: - - id - - choices - - object - - created - - model - - input_messages + - id + - choices + - created + - model + - input_messages title: OpenAICompletionWithInputMessages OpenAICompletionRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model prompt: - oneOf: - - type: string - - type: array - items: - type: string - - type: array + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - items: + type: integer + type: array + title: list[integer] + - items: items: type: integer - - type: array - items: - type: array - items: - type: integer - description: The prompt to generate a completion for. + type: array + type: array + title: list[array] + title: string | ... (4 variants) best_of: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' echo: - type: boolean - description: (Optional) Whether to echo the prompt. + anyOf: + - type: boolean + - type: 'null' frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' logprobs: - type: boolean - description: (Optional) The log probabilities to use. + anyOf: + - type: boolean + - type: 'null' max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. + anyOf: + - type: integer + - type: 'null' n: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' seed: - type: integer - description: (Optional) The seed to use. + anyOf: + - type: integer + - type: 'null' stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] stream: - type: boolean - description: >- - (Optional) Whether to stream the response. + anyOf: + - type: boolean + - type: 'null' stream_options: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. + anyOf: + - additionalProperties: true + type: object + - type: 'null' temperature: - type: number - description: (Optional) The temperature to use. + anyOf: + - type: number + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. + anyOf: + - type: string + - type: 'null' suffix: - type: string - description: >- - (Optional) The suffix that should be appended to the completion. - additionalProperties: false - required: - - model - - prompt - title: OpenAICompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible completion endpoint. - OpenAICompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: properties: id: type: string + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAICompletionChoice' + type: array + title: Choices created: type: integer + title: Created model: type: string + title: Model object: type: string const: text_completion + title: Object default: text_completion - additionalProperties: false - required: - - id - - choices - - created - - model - - object - title: OpenAICompletion - description: >- - Response from an OpenAI-compatible completion request. - OpenAICompletionChoice: type: object + required: + - id + - choices + - created + - model + title: OpenAICompletion + description: |- + Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + OpenAICompletionChoice: properties: finish_reason: type: string + title: Finish Reason text: type: string + title: Text index: type: integer + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs + type: object required: - - finish_reason - - text - - index + - finish_reason + - text + - index title: OpenAICompletionChoice - description: >- + description: |- A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice ConversationItem: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' @@ -4195,5859 +3954,7513 @@ components: mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) OpenAIResponseAnnotationCitation: - type: object properties: type: type: string const: url_citation + title: Type default: url_citation - description: >- - Annotation type identifier, always "url_citation" end_index: type: integer - description: >- - End position of the citation span in the content + title: End Index start_index: type: integer - description: >- - Start position of the citation span in the content + title: Start Index title: type: string - description: Title of the referenced web resource + title: Title url: type: string - description: URL of the referenced web resource - additionalProperties: false - required: - - type - - end_index - - start_index - - title - - url - title: OpenAIResponseAnnotationCitation - description: >- - URL citation annotation for referencing external web resources. - "OpenAIResponseAnnotationContainerFileCitation": + title: Url type: object + required: + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: URL citation annotation for referencing external web resources. + OpenAIResponseAnnotationContainerFileCitation: properties: type: type: string const: container_file_citation + title: Type default: container_file_citation container_id: type: string + title: Container Id end_index: type: integer + title: End Index file_id: type: string + title: File Id filename: type: string + title: Filename start_index: type: integer - additionalProperties: false - required: - - type - - container_id - - end_index - - file_id - - filename - - start_index - title: >- - OpenAIResponseAnnotationContainerFileCitation - OpenAIResponseAnnotationFileCitation: + title: Start Index type: object + required: + - container_id + - end_index + - file_id + - filename + - start_index + title: OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: properties: type: type: string const: file_citation + title: Type default: file_citation - description: >- - Annotation type identifier, always "file_citation" file_id: type: string - description: Unique identifier of the referenced file + title: File Id filename: type: string - description: Name of the referenced file + title: Filename index: type: integer - description: >- - Position index of the citation within the content - additionalProperties: false - required: - - type - - file_id - - filename - - index - title: OpenAIResponseAnnotationFileCitation - description: >- - File citation annotation for referencing specific files in response content. - OpenAIResponseAnnotationFilePath: + title: Index type: object + required: + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: properties: type: type: string const: file_path + title: Type default: file_path file_id: type: string + title: File Id index: type: integer - additionalProperties: false + title: Index + type: object required: - - type - - file_id - - index + - file_id + - index title: OpenAIResponseAnnotationFilePath OpenAIResponseAnnotations: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) OpenAIResponseContentPartRefusal: - type: object properties: type: type: string const: refusal + title: Type default: refusal - description: >- - Content part type identifier, always "refusal" refusal: type: string - description: Refusal text supplied by the model - additionalProperties: false - required: - - type - - refusal - title: OpenAIResponseContentPartRefusal - description: >- - Refusal content within a streamed response part. - "OpenAIResponseInputFunctionToolCallOutput": + title: Refusal type: object + required: + - refusal + title: OpenAIResponseContentPartRefusal + description: Refusal content within a streamed response part. + OpenAIResponseInputFunctionToolCallOutput: properties: call_id: type: string + title: Call Id output: type: string + title: Output type: type: string const: function_call_output + title: Type default: function_call_output id: - type: string + anyOf: + - type: string + - type: 'null' status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. - OpenAIResponseInputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - discriminator: - propertyName: type - mapping: - input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' - input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' - OpenAIResponseInputMessageContentImage: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - output + title: OpenAIResponseInputFunctionToolCallOutput + description: This represents the output of a function call that gets passed back to the model. + OpenAIResponseInputMessageContent: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + OpenAIResponseInputMessageContentFile: + properties: + type: + type: string + const: input_file + title: Type + default: input_file + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + file_url: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentFile + description: File content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentImage: properties: detail: - oneOf: - - type: string - const: low - - type: string - const: high - - type: string - const: auto + title: Detail default: auto - description: >- - Level of detail for image processing, can be "low", "high", or "auto" + type: string + enum: + - low + - high + - auto type: type: string const: input_image + title: Type default: input_image - description: >- - Content type identifier, always "input_image" + file_id: + anyOf: + - type: string + - type: 'null' image_url: - type: string - description: (Optional) URL of the image content - additionalProperties: false - required: - - detail - - type - title: OpenAIResponseInputMessageContentImage - description: >- - Image content for input messages in OpenAI response format. - OpenAIResponseInputMessageContentText: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIResponseInputMessageContentImage + description: Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: properties: text: type: string - description: The text content of the input message + title: Text type: type: string const: input_text + title: Type default: input_text - description: >- - Content type identifier, always "input_text" - additionalProperties: false - required: - - text - - type - title: OpenAIResponseInputMessageContentText - description: >- - Text content for input messages in OpenAI response format. - OpenAIResponseMCPApprovalRequest: type: object + required: + - text + title: OpenAIResponseInputMessageContentText + description: Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: properties: arguments: type: string + title: Arguments id: type: string + title: Id name: type: string + title: Name server_label: type: string + title: Server Label type: type: string const: mcp_approval_request + title: Type default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: type: object + required: + - arguments + - id + - name + - server_label + title: OpenAIResponseMCPApprovalRequest + description: A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: properties: approval_request_id: type: string + title: Approval Request Id approve: type: boolean + title: Approve type: type: string const: mcp_approval_response + title: Type default: mcp_approval_response id: - type: string + anyOf: + - type: string + - type: 'null' reason: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - approval_request_id - - approve - - type + - approval_request_id + - approve title: OpenAIResponseMCPApprovalResponse description: A response to an MCP approval request. OpenAIResponseMessage: - type: object + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. properties: content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputMessageContent' - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + anyOf: + - type: string + - items: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] role: - oneOf: - - type: string - const: system - - type: string - const: developer - - type: string - const: user - - type: string - const: assistant - type: + title: Role type: string + enum: + - system + - developer + - user + - assistant + default: system + type: const: message default: message + title: Type + type: string id: - type: string + anyOf: + - type: string + - type: 'null' + nullable: true status: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - content - - role - - type + - content + - role title: OpenAIResponseMessage - description: >- - Corresponds to the various Message types in the Responses API. They are all - under one type because the Responses API gives them all the same "type" value, - and there is no way to tell them apart in certain scenarios. + type: object OpenAIResponseOutputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' - "OpenAIResponseOutputMessageContentOutputText": - type: object + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + OpenAIResponseOutputMessageContentOutputText: properties: text: type: string + title: Text type: type: string const: output_text + title: Type default: output_text annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - additionalProperties: false - required: - - text - - type - - annotations - title: >- - OpenAIResponseOutputMessageContentOutputText - "OpenAIResponseOutputMessageFileSearchToolCall": + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + discriminator: + propertyName: type + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + type: array + title: Annotations type: object + required: + - text + title: OpenAIResponseOutputMessageContentOutputText + OpenAIResponseOutputMessageFileSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id queries: - type: array items: type: string - description: List of search queries executed + type: array + title: Queries status: type: string - description: >- - Current status of the file search operation + title: Status type: type: string const: file_search_call + title: Type default: file_search_call - description: >- - Tool call type identifier, always "file_search_call" results: - type: array - items: - type: object - properties: - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes associated with the file - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: >- - Relevance score for this search result (between 0 and 1) - text: - type: string - description: Text content of the search result - additionalProperties: false - required: - - attributes - - file_id - - filename - - score - - text - title: >- - OpenAIResponseOutputMessageFileSearchToolCallResults - description: >- - Search results returned by the file search operation. - description: >- - (Optional) Search results returned by the file search operation - additionalProperties: false - required: - - id - - queries - - status - - type - title: >- - OpenAIResponseOutputMessageFileSearchToolCall - description: >- - File search tool call output message for OpenAI responses. - "OpenAIResponseOutputMessageFunctionToolCall": + anyOf: + - items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults' + type: array + - type: 'null' type: object + required: + - id + - queries + - status + title: OpenAIResponseOutputMessageFileSearchToolCall + description: File search tool call output message for OpenAI responses. + OpenAIResponseOutputMessageFunctionToolCall: properties: call_id: type: string - description: Unique identifier for the function call + title: Call Id name: type: string - description: Name of the function being called + title: Name arguments: type: string - description: >- - JSON string containing the function arguments + title: Arguments type: type: string const: function_call + title: Type default: function_call - description: >- - Tool call type identifier, always "function_call" id: - type: string - description: >- - (Optional) Additional identifier for the tool call + anyOf: + - type: string + - type: 'null' status: - type: string - description: >- - (Optional) Current status of the function call execution - additionalProperties: false - required: - - call_id - - name - - arguments - - type - title: >- - OpenAIResponseOutputMessageFunctionToolCall - description: >- - Function tool call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPCall: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - name + - arguments + title: OpenAIResponseOutputMessageFunctionToolCall + description: Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: properties: id: type: string - description: Unique identifier for this MCP call + title: Id type: type: string const: mcp_call + title: Type default: mcp_call - description: >- - Tool call type identifier, always "mcp_call" arguments: type: string - description: >- - JSON string containing the MCP call arguments + title: Arguments name: type: string - description: Name of the MCP method being called + title: Name server_label: type: string - description: >- - Label identifying the MCP server handling the call + title: Server Label error: - type: string - description: >- - (Optional) Error message if the MCP call failed + anyOf: + - type: string + - type: 'null' output: - type: string - description: >- - (Optional) Output result from the successful MCP call - additionalProperties: false - required: - - id - - type - - arguments - - name - - server_label - title: OpenAIResponseOutputMessageMCPCall - description: >- - Model Context Protocol (MCP) call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPListTools: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: properties: id: type: string - description: >- - Unique identifier for this MCP list tools operation + title: Id type: type: string const: mcp_list_tools + title: Type default: mcp_list_tools - description: >- - Tool call type identifier, always "mcp_list_tools" server_label: type: string - description: >- - Label identifying the MCP server providing the tools + title: Server Label tools: - type: array items: - type: object - properties: - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - JSON schema defining the tool's input parameters - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Description of what the tool does - additionalProperties: false - required: - - input_schema - - name - title: MCPListToolsTool - description: >- - Tool definition returned by MCP list tools operation. - description: >- - List of available tools provided by the MCP server - additionalProperties: false - required: - - id - - type - - server_label - - tools - title: OpenAIResponseOutputMessageMCPListTools - description: >- - MCP list tools output message containing available tools from an MCP server. - "OpenAIResponseOutputMessageWebSearchToolCall": + $ref: '#/components/schemas/MCPListToolsTool' + type: array + title: Tools type: object + required: + - id + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: MCP list tools output message containing available tools from an MCP server. + OpenAIResponseOutputMessageWebSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id status: type: string - description: >- - Current status of the web search operation + title: Status type: type: string const: web_search_call + title: Type default: web_search_call - description: >- - Tool call type identifier, always "web_search_call" - additionalProperties: false - required: - - id - - status - - type - title: >- - OpenAIResponseOutputMessageWebSearchToolCall - description: >- - Web search tool call output message for OpenAI responses. - CreateConversationRequest: type: object + required: + - id + - status + title: OpenAIResponseOutputMessageWebSearchToolCall + description: Web search tool call output message for OpenAI responses. + CreateConversationRequest: properties: items: - type: array - items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Initial items to include in the conversation context. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + - type: 'null' metadata: - type: object - additionalProperties: - type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object title: CreateConversationRequest Conversation: - type: object properties: id: type: string + title: Id + description: The unique ID of the conversation. object: type: string const: conversation + title: Object + description: The object type, which is always conversation. default: conversation created_at: type: integer + title: Created At + description: The time at which the conversation was created, measured in seconds since the Unix epoch. metadata: - type: object - additionalProperties: - type: string - items: - type: array - items: + anyOf: + - additionalProperties: + type: string type: object - title: dict - description: >- - dict() -> new empty dictionary dict(mapping) -> new dictionary initialized - from a mapping object's (key, value) pairs dict(iterable) -> new - dictionary initialized as if via: d = {} for k, v in iterable: d[k] - = v dict(**kwargs) -> new dictionary initialized with the name=value - pairs in the keyword argument list. For example: dict(one=1, two=2) - additionalProperties: false + - type: 'null' + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. + items: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + type: object required: - - id - - object - - created_at + - id + - created_at title: Conversation description: OpenAI-compatible conversation object. UpdateConversationRequest: - type: object properties: metadata: - type: object additionalProperties: type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + type: object + title: Metadata + type: object required: - - metadata + - metadata title: UpdateConversationRequest ConversationDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted conversation identifier object: type: string + title: Object + description: Object type default: conversation.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationDeletedResource description: Response for deleted conversation. ConversationItemList: - type: object properties: object: type: string + title: Object + description: Object type default: list data: - type: array items: - $ref: '#/components/schemas/ConversationItem' + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (9 variants) + type: array + title: Data + description: List of conversation items first_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the first item in the list last_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the last item in the list has_more: type: boolean + title: Has More + description: Whether there are more items available default: false - additionalProperties: false - required: - - object - - data - - has_more - title: ConversationItemList - description: >- - List of conversation items with pagination. - AddItemsRequest: type: object + required: + - data + title: ConversationItemList + description: List of conversation items with pagination. + AddItemsRequest: properties: items: - type: array items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Items to include in the conversation context. - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + title: Items + type: object required: - - items + - items title: AddItemsRequest ConversationItemDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted item identifier object: type: string + title: Object + description: Object type default: conversation.item.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationItemDeletedResource description: Response for deleted conversation item. OpenAIEmbeddingsRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be an embedding model - registered with Llama Stack and available via the /models endpoint. + title: Model input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input text to embed, encoded as a string or array of strings. To embed - multiple inputs in a single request, pass an array of strings. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] encoding_format: - type: string + anyOf: + - type: string + - type: 'null' default: float - description: >- - (Optional) The format to return the embeddings in. Can be either "float" - or "base64". Defaults to "float". dimensions: - type: integer - description: >- - (Optional) The number of dimensions the resulting output embeddings should - have. Only supported in text-embedding-3 and later models. + anyOf: + - type: integer + - type: 'null' user: - type: string - description: >- - (Optional) A unique identifier representing your end-user, which can help - OpenAI to monitor and detect abuse. - additionalProperties: false - required: - - model - - input - title: OpenAIEmbeddingsRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible embeddings endpoint. - OpenAIEmbeddingData: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: properties: object: type: string const: embedding + title: Object default: embedding - description: >- - The object type, which will be "embedding" embedding: - oneOf: - - type: array - items: - type: number - - type: string - description: >- - The embedding vector as a list of floats (when encoding_format="float") - or as a base64-encoded string (when encoding_format="base64") + anyOf: + - items: + type: number + type: array + title: list[number] + - type: string + title: list[number] | string index: type: integer - description: >- - The index of the embedding in the input list - additionalProperties: false - required: - - object - - embedding - - index - title: OpenAIEmbeddingData - description: >- - A single embedding data object from an OpenAI-compatible embeddings response. - OpenAIEmbeddingUsage: + title: Index type: object + required: + - embedding + - index + title: OpenAIEmbeddingData + description: A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: properties: prompt_tokens: type: integer - description: The number of tokens in the input + title: Prompt Tokens total_tokens: type: integer - description: The total number of tokens used - additionalProperties: false - required: - - prompt_tokens - - total_tokens - title: OpenAIEmbeddingUsage - description: >- - Usage information for an OpenAI-compatible embeddings response. - OpenAIEmbeddingsResponse: + title: Total Tokens type: object + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: properties: object: type: string const: list + title: Object default: list - description: The object type, which will be "list" data: - type: array items: $ref: '#/components/schemas/OpenAIEmbeddingData' - description: List of embedding data objects + type: array + title: Data model: type: string - description: >- - The model that was used to generate the embeddings + title: Model usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' - description: Usage information - additionalProperties: false + type: object required: - - object - - data - - model - - usage + - data + - model + - usage title: OpenAIEmbeddingsResponse - description: >- - Response from an OpenAI-compatible embeddings request. + description: Response from an OpenAI-compatible embeddings request. OpenAIFilePurpose: type: string enum: - - assistants - - batch + - assistants + - batch title: OpenAIFilePurpose - description: >- - Valid purpose values for OpenAI Files API. + description: Valid purpose values for OpenAI Files API. ListOpenAIFileResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/OpenAIFileObject' - description: List of file objects + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more files available beyond this page + title: Has More first_id: type: string - description: >- - ID of the first file in the list for pagination + title: First Id last_id: type: string - description: >- - ID of the last file in the list for pagination + title: Last Id object: type: string const: list + title: Object default: list - description: The object type, which is always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIFileResponse - description: >- - Response for listing files in OpenAI Files API. - OpenAIFileObject: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIFileResponse + description: Response for listing files in OpenAI Files API. + OpenAIFileObject: properties: object: type: string const: file + title: Object default: file - description: The object type, which is always "file" id: type: string - description: >- - The file identifier, which can be referenced in the API endpoints + title: Id bytes: type: integer - description: The size of the file, in bytes + title: Bytes created_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file was created + title: Created At expires_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file expires + title: Expires At filename: type: string - description: The name of the file + title: Filename purpose: - type: string - enum: - - assistants - - batch - description: The intended purpose of the file - additionalProperties: false - required: - - object - - id - - bytes - - created_at - - expires_at - - filename - - purpose - title: OpenAIFileObject - description: >- - OpenAI File object as defined in the OpenAI Files API. - ExpiresAfter: + $ref: '#/components/schemas/OpenAIFilePurpose' type: object + required: + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: properties: anchor: type: string const: created_at + title: Anchor seconds: type: integer - additionalProperties: false + maximum: 2592000.0 + minimum: 3600.0 + title: Seconds + type: object required: - - anchor - - seconds + - anchor + - seconds title: ExpiresAfter - description: >- + description: |- Control expiration of uploaded files. Params: - anchor, must be "created_at" - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) OpenAIFileDeleteResponse: - type: object properties: id: type: string - description: The file identifier that was deleted + title: Id object: type: string const: file + title: Object default: file - description: The object type, which is always "file" deleted: type: boolean - description: >- - Whether the file was successfully deleted - additionalProperties: false + title: Deleted + type: object required: - - id - - object - - deleted + - id + - deleted title: OpenAIFileDeleteResponse - description: >- - Response for deleting a file in OpenAI Files API. + description: Response for deleting a file in OpenAI Files API. Response: - type: object title: Response - HealthInfo: type: object + HealthInfo: properties: status: - type: string - enum: - - OK - - Error - - Not Implemented - description: Current health status of the service - additionalProperties: false - required: - - status - title: HealthInfo - description: >- - Health status information for the service. - RouteInfo: + $ref: '#/components/schemas/HealthStatus' type: object + required: + - status + title: HealthInfo + description: Health status information for the service. + RouteInfo: properties: route: type: string - description: The API endpoint path + title: Route method: type: string - description: HTTP method for the route + title: Method provider_types: - type: array items: type: string - description: >- - List of provider types that implement this route - additionalProperties: false - required: - - route - - method - - provider_types - title: RouteInfo - description: >- - Information about an API route including its path, method, and implementing - providers. - ListRoutesResponse: + type: array + title: Provider Types type: object + required: + - route + - method + - provider_types + title: RouteInfo + description: Information about an API route including its path, method, and implementing providers. + ListRoutesResponse: properties: data: - type: array items: $ref: '#/components/schemas/RouteInfo' - description: >- - List of available route information objects - additionalProperties: false - required: - - data - title: ListRoutesResponse - description: >- - Response containing a list of all available API routes. - Model: + type: array + title: Data type: object + required: + - data + title: ListRoutesResponse + description: Response containing a list of all available API routes. + OpenAIModel: + properties: + id: + type: string + title: Id + object: + type: string + const: model + title: Object + default: model + created: + type: integer + title: Created + owned_by: + type: string + title: Owned By + custom_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - id + - created + - owned_by + title: OpenAIModel + description: |- + A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + OpenAIListModelsResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAIModel' + type: array + title: Data + type: object + required: + - data + title: OpenAIListModelsResponse + Model: properties: identifier: type: string - description: >- - Unique identifier for this resource in llama stack + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string - description: >- - Unique identifier for this resource in the provider + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string - description: >- - ID of the provider that owns this resource + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: model + title: Type default: model - description: >- - The resource type, always 'model' for model resources metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata description: Any additional metadata for this model model_type: $ref: '#/components/schemas/ModelType' default: llm - description: >- - The type of model (LLM or embedding model) - additionalProperties: false + type: object required: - - identifier - - provider_id - - type - - metadata - - model_type + - identifier + - provider_id title: Model - description: >- - A model resource representing an AI model registered in Llama Stack. + description: A model resource representing an AI model registered in Llama Stack. ModelType: type: string enum: - - llm - - embedding + - llm + - embedding + - rerank title: ModelType - description: >- - Enumeration of supported model types in Llama Stack. - ListModelsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Model' - additionalProperties: false - required: - - data - title: ListModelsResponse - RegisterModelRequest: - type: object - properties: - model_id: - type: string - description: The identifier of the model to register. - provider_model_id: - type: string - description: >- - The identifier of the model in the provider. - provider_id: - type: string - description: The identifier of the provider. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Any additional metadata for this model. - model_type: - $ref: '#/components/schemas/ModelType' - description: The type of model to register. - additionalProperties: false - required: - - model_id - title: RegisterModelRequest + description: Enumeration of supported model types in Llama Stack. RunModerationRequest: - type: object properties: input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input (or inputs) to classify. Can be a single string, an array of strings, - or an array of multi-modal input objects similar to other models. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] model: - type: string - description: >- - The content moderation model you would like to use. - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - input - - model + - input title: RunModerationRequest ModerationObject: - type: object properties: id: type: string - description: >- - The unique identifier for the moderation request. + title: Id model: type: string - description: >- - The model used to generate the moderation results. + title: Model results: - type: array items: $ref: '#/components/schemas/ModerationObjectResults' - description: A list of moderation objects - additionalProperties: false + type: array + title: Results + type: object required: - - id - - model - - results + - id + - model + - results title: ModerationObject description: A moderation object. ModerationObjectResults: - type: object properties: flagged: type: boolean - description: >- - Whether any of the below categories are flagged. + title: Flagged categories: - type: object - additionalProperties: - type: boolean - description: >- - A list of the categories, and whether they are flagged or not. + anyOf: + - additionalProperties: + type: boolean + type: object + - type: 'null' category_applied_input_types: - type: object - additionalProperties: - type: array - items: - type: string - description: >- - A list of the categories along with the input type(s) that the score applies - to. + anyOf: + - additionalProperties: + items: + type: string + type: array + type: object + - type: 'null' category_scores: - type: object - additionalProperties: - type: number - description: >- - A list of the categories along with their scores as predicted by model. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' user_message: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false + title: Metadata + type: object required: - - flagged - - metadata + - flagged title: ModerationObjectResults description: A moderation object. Prompt: - type: object properties: prompt: - type: string - description: >- - The system prompt text with variable placeholders. Variables are only - supported when using the Responses API. + anyOf: + - type: string + - type: 'null' + description: The system prompt with variable placeholders version: type: integer - description: >- - Version (integer starting at 1, incremented on save) + minimum: 1.0 + title: Version + description: Version (integer starting at 1, incremented on save) prompt_id: type: string - description: >- - Unique identifier formatted as 'pmpt_<48-digit-hash>' + title: Prompt Id + description: Unique identifier in format 'pmpt_<48-digit-hash>' variables: - type: array items: type: string - description: >- - List of prompt variable names that can be used in the prompt template + type: array + title: Variables + description: List of variable names that can be used in the prompt template is_default: type: boolean + title: Is Default + description: Boolean indicating whether this version is the default version default: false - description: >- - Boolean indicating whether this version is the default version for this - prompt - additionalProperties: false - required: - - version - - prompt_id - - variables - - is_default - title: Prompt - description: >- - A prompt resource representing a stored OpenAI Compatible prompt template - in Llama Stack. - ListPromptsResponse: type: object + required: + - version + - prompt_id + title: Prompt + description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + ListPromptsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Prompt' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListPromptsResponse description: Response model to list prompts. CreatePromptRequest: - type: object properties: prompt: type: string - description: >- - The prompt text content with variable placeholders. + title: Prompt variables: - type: array - items: - type: string - description: >- - List of variable names that can be used in the prompt template. - additionalProperties: false + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object required: - - prompt + - prompt title: CreatePromptRequest UpdatePromptRequest: - type: object properties: prompt: type: string - description: The updated prompt text content. + title: Prompt version: type: integer - description: >- - The current version of the prompt being updated. + title: Version variables: - type: array - items: - type: string - description: >- - Updated list of variable names that can be used in the prompt template. + anyOf: + - items: + type: string + type: array + - type: 'null' set_as_default: type: boolean - description: >- - Set the new version as the default (default=True). - additionalProperties: false + title: Set As Default + default: true + type: object required: - - prompt - - version - - set_as_default + - prompt + - version title: UpdatePromptRequest SetDefaultVersionRequest: - type: object properties: version: type: integer - description: The version to set as default. - additionalProperties: false + title: Version + type: object required: - - version + - version title: SetDefaultVersionRequest ProviderInfo: - type: object properties: api: type: string - description: The API name this provider implements + title: Api provider_id: type: string - description: Unique identifier for the provider + title: Provider Id provider_type: type: string - description: The type of provider implementation + title: Provider Type config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Configuration parameters for the provider + title: Config health: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Current health status of the provider - additionalProperties: false - required: - - api - - provider_id - - provider_type - - config - - health - title: ProviderInfo - description: >- - Information about a registered provider including its configuration and health - status. - ListProvidersResponse: + title: Health type: object + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: Information about a registered provider including its configuration and health status. + ListProvidersResponse: properties: data: - type: array items: $ref: '#/components/schemas/ProviderInfo' - description: List of provider information objects - additionalProperties: false - required: - - data - title: ListProvidersResponse - description: >- - Response containing a list of all available providers. - ListOpenAIResponseObject: + type: array + title: Data type: object + required: + - data + title: ListProvidersResponse + description: Response containing a list of all available providers. + ListOpenAIResponseObject: properties: data: - type: array items: $ref: '#/components/schemas/OpenAIResponseObjectWithInput' - description: >- - List of response objects with their input context + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more results available beyond this page + title: Has More first_id: type: string - description: >- - Identifier of the first item in this page + title: First Id last_id: type: string - description: Identifier of the last item in this page + title: Last Id object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIResponseObject - description: >- - Paginated list of OpenAI response objects with navigation metadata. - OpenAIResponseError: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIResponseObject + description: Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: properties: code: type: string - description: >- - Error code identifying the type of failure + title: Code message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: OpenAIResponseError - description: >- - Error details for failed OpenAI response requests. - OpenAIResponseInput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMessage' - OpenAIResponseInputToolFileSearch: + title: Message type: object + required: + - code + - message + title: OpenAIResponseError + description: Error details for failed OpenAI response requests. + OpenAIResponseInput: + anyOf: + - discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage + OpenAIResponseInputToolFileSearch: properties: type: type: string const: file_search + title: Type default: file_search - description: >- - Tool type identifier, always "file_search" vector_store_ids: - type: array items: type: string - description: >- - List of vector store identifiers to search within + type: array + title: Vector Store Ids filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional filters to apply to the search + anyOf: + - additionalProperties: true + type: object + - type: 'null' max_num_results: - type: integer + anyOf: + - type: integer + maximum: 50.0 + minimum: 1.0 + - type: 'null' default: 10 - description: >- - (Optional) Maximum number of search results to return (1-50) ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: - type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - (Optional) Options for ranking and scoring search results - additionalProperties: false - required: - - type - - vector_store_ids - title: OpenAIResponseInputToolFileSearch - description: >- - File search tool configuration for OpenAI response inputs. - OpenAIResponseInputToolFunction: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions type: object + required: + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: properties: type: type: string const: function + title: Type default: function - description: Tool type identifier, always "function" name: type: string - description: Name of the function that can be called + title: Name description: - type: string - description: >- - (Optional) Description of what the function does + anyOf: + - type: string + - type: 'null' parameters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON schema defining the function's parameters + anyOf: + - additionalProperties: true + type: object + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict parameter validation - additionalProperties: false - required: - - type - - name - title: OpenAIResponseInputToolFunction - description: >- - Function tool configuration for OpenAI response inputs. - OpenAIResponseInputToolWebSearch: + anyOf: + - type: boolean + - type: 'null' type: object + required: + - name + - parameters + title: OpenAIResponseInputToolFunction + description: Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: properties: type: - oneOf: - - type: string - const: web_search - - type: string - const: web_search_preview - - type: string - const: web_search_preview_2025_03_11 + title: Type default: web_search - description: Web search tool type variant to use - search_context_size: type: string + enum: + - web_search + - web_search_preview + - web_search_preview_2025_03_11 + - web_search_2025_08_26 + search_context_size: + anyOf: + - type: string + pattern: ^low|medium|high$ + - type: 'null' default: medium - description: >- - (Optional) Size of search context, must be "low", "medium", or "high" - additionalProperties: false - required: - - type - title: OpenAIResponseInputToolWebSearch - description: >- - Web search tool configuration for OpenAI response inputs. - OpenAIResponseObjectWithInput: type: object + title: OpenAIResponseInputToolWebSearch + description: Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: >- - List of input items that led to this response - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Input + type: object required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - - input + - created_at + - id + - model + - output + - status + - input title: OpenAIResponseObjectWithInput - description: >- - OpenAI response object extended with input context information. + description: OpenAI response object extended with input context information. OpenAIResponseOutput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - OpenAIResponseText: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + OpenAIResponsePrompt: + properties: + id: + type: string + title: Id + variables: + anyOf: + - additionalProperties: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: object + - type: 'null' + version: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + title: OpenAIResponsePrompt + description: OpenAI compatible Prompt object that is used in OpenAI responses. + OpenAIResponseText: properties: format: - type: object - properties: - type: - oneOf: - - type: string - const: text - - type: string - const: json_schema - - type: string - const: json_object - description: >- - Must be "text", "json_schema", or "json_object" to identify the format - type - name: - type: string - description: >- - The name of the response format. Only used for json_schema. - schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. Only used for json_schema. - description: - type: string - description: >- - (Optional) A description of the response format. Only used for json_schema. - strict: - type: boolean - description: >- - (Optional) Whether to strictly enforce the JSON schema. If true, the - response must match the schema exactly. Only used for json_schema. - additionalProperties: false - required: - - type - description: >- - (Optional) Text format configuration specifying output format requirements - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseTextFormat' + title: OpenAIResponseTextFormat + - type: 'null' + title: OpenAIResponseTextFormat + type: object title: OpenAIResponseText - description: >- - Text response configuration for OpenAI responses. + description: Text response configuration for OpenAI responses. OpenAIResponseTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - title: OpenAIResponseToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response object. - OpenAIResponseUsage: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + title: OpenAIResponseToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: properties: input_tokens: type: integer - description: Number of tokens in the input + title: Input Tokens output_tokens: type: integer - description: Number of tokens in the output + title: Output Tokens total_tokens: type: integer - description: Total tokens used (input + output) + title: Total Tokens input_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - description: Detailed breakdown of input token usage + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails' + title: OpenAIResponseUsageInputTokensDetails + - type: 'null' + title: OpenAIResponseUsageInputTokensDetails output_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - description: Detailed breakdown of output token usage - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails' + title: OpenAIResponseUsageOutputTokensDetails + - type: 'null' + title: OpenAIResponseUsageOutputTokensDetails + type: object required: - - input_tokens - - output_tokens - - total_tokens + - input_tokens + - output_tokens + - total_tokens title: OpenAIResponseUsage description: Usage information for OpenAI response. ResponseGuardrailSpec: - type: object + description: Specification for a guardrail to apply during response generation. properties: type: + title: Type type: string - description: The type/identifier of the guardrail. - additionalProperties: false required: - - type + - type title: ResponseGuardrailSpec - description: >- - Specification for a guardrail to apply during response generation. + type: object OpenAIResponseInputTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseInputToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label server_url: type: string - description: URL endpoint of the MCP server + title: Server Url headers: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) HTTP headers to include when connecting to the server + anyOf: + - additionalProperties: true + type: object + - type: 'null' + authorization: + anyOf: + - type: string + - type: 'null' require_approval: - oneOf: - - type: string - const: always - - type: string - const: never - - type: object - properties: - always: - type: array - items: - type: string - description: >- - (Optional) List of tool names that always require approval - never: - type: array - items: - type: string - description: >- - (Optional) List of tool names that never require approval - additionalProperties: false - title: ApprovalFilter - description: >- - Filter configuration for MCP tool approval requirements. + anyOf: + - type: string + const: always + - type: string + const: never + - $ref: '#/components/schemas/ApprovalFilter' + title: ApprovalFilter + title: string | ApprovalFilter default: never - description: >- - Approval requirement for tool calls ("always", "never", or filter) allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - - server_url - - require_approval - title: OpenAIResponseInputToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response inputs. - CreateOpenaiResponseRequest: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + - server_url + title: OpenAIResponseInputToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: properties: input: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: Input message(s) to create the response. + anyOf: + - type: string + - items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input + type: array + title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] + title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] model: type: string - description: The underlying LLM used for completions. + title: Model + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt instructions: - type: string + anyOf: + - type: string + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) if specified, the new response will be a continuation of the - previous response. This can be used to easily fork-off new responses from - existing responses. + anyOf: + - type: string + - type: 'null' conversation: - type: string - description: >- - (Optional) The ID of a conversation to add the response to. Must begin - with 'conv_'. Input and output messages will be automatically added to - the conversation. + anyOf: + - type: string + - type: 'null' store: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: true stream: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: false temperature: - type: number + anyOf: + - type: number + - type: 'null' text: - $ref: '#/components/schemas/OpenAIResponseText' + anyOf: + - $ref: '#/components/schemas/OpenAIResponseText' + title: OpenAIResponseText + - type: 'null' + title: OpenAIResponseText tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputTool' + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' include: - type: array - items: - type: string - description: >- - (Optional) Additional fields to include in the response. + anyOf: + - items: + type: string + type: array + - type: 'null' max_infer_iters: - type: integer - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + default: 10 + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object required: - - input - - model + - input + - model title: CreateOpenaiResponseRequest OpenAIResponseObject: - type: object properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context - additionalProperties: false - required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - title: OpenAIResponseObject - description: >- - Complete OpenAI response object containing generation results and metadata. - OpenAIResponseContentPartOutputText: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object + required: + - created_at + - id + - model + - output + - status + title: OpenAIResponseObject + description: Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + description: Text content within a streamed response part. properties: type: - type: string const: output_text default: output_text - description: >- - Content part type identifier, always "output_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Text emitted for this content part annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - description: >- - Structured annotations associated with the text + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + title: Annotations + type: array logprobs: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) Token log probability details - additionalProperties: false + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + nullable: true required: - - type - - text - - annotations + - text title: OpenAIResponseContentPartOutputText - description: >- - Text content within a streamed response part. - "OpenAIResponseContentPartReasoningSummary": type: object + OpenAIResponseContentPartReasoningSummary: + description: Reasoning summary part in a streamed response. properties: type: - type: string const: summary_text default: summary_text - description: >- - Content part type identifier, always "summary_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Summary text - additionalProperties: false required: - - type - - text - title: >- - OpenAIResponseContentPartReasoningSummary - description: >- - Reasoning summary part in a streamed response. - OpenAIResponseContentPartReasoningText: + - text + title: OpenAIResponseContentPartReasoningSummary type: object + OpenAIResponseContentPartReasoningText: + description: Reasoning text emitted as part of a streamed response. properties: type: - type: string const: reasoning_text default: reasoning_text - description: >- - Content part type identifier, always "reasoning_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Reasoning text supplied by the model - additionalProperties: false required: - - type - - text + - text title: OpenAIResponseContentPartReasoningText - description: >- - Reasoning text emitted as part of a streamed response. + type: object OpenAIResponseObjectStream: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: - propertyName: type mapping: - response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' - "OpenAIResponseObjectStreamResponseCompleted": - type: object + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + title: OpenAIResponseObjectStreamResponseCreated + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + title: OpenAIResponseObjectStreamResponseInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + title: OpenAIResponseObjectStreamResponseOutputItemAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + title: OpenAIResponseObjectStreamResponseOutputItemDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + title: OpenAIResponseObjectStreamResponseOutputTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + title: OpenAIResponseObjectStreamResponseOutputTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + title: OpenAIResponseObjectStreamResponseMcpCallFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + title: OpenAIResponseObjectStreamResponseContentPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + title: OpenAIResponseObjectStreamResponseContentPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + title: OpenAIResponseObjectStreamResponseReasoningTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + title: OpenAIResponseObjectStreamResponseRefusalDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + title: OpenAIResponseObjectStreamResponseRefusalDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + title: OpenAIResponseObjectStreamResponseIncomplete + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + title: OpenAIResponseObjectStreamResponseFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + title: OpenAIResponseObjectStreamResponseCompleted + title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + OpenAIResponseObjectStreamResponseCompleted: + description: Streaming event indicating a response has been completed. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Completed response object type: - type: string const: response.completed default: response.completed - description: >- - Event type identifier, always "response.completed" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCompleted - description: >- - Streaming event indicating a response has been completed. - "OpenAIResponseObjectStreamResponseContentPartAdded": + - response + title: OpenAIResponseObjectStreamResponseCompleted type: object + OpenAIResponseObjectStreamResponseContentPartAdded: + description: Streaming event for when a new content part is added to a response item. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The content part that was added + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.added default: response.content_part.added - description: >- - Event type identifier, always "response.content_part.added" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartAdded - description: >- - Streaming event for when a new content part is added to a response item. - "OpenAIResponseObjectStreamResponseContentPartDone": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartAdded type: object + OpenAIResponseObjectStreamResponseContentPartDone: + description: Streaming event for when a content part is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The completed content part + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.done default: response.content_part.done - description: >- - Event type identifier, always "response.content_part.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartDone - description: >- - Streaming event for when a content part is completed. - "OpenAIResponseObjectStreamResponseCreated": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartDone type: object + OpenAIResponseObjectStreamResponseCreated: + description: Streaming event indicating a new response has been created. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The response object that was created type: - type: string const: response.created default: response.created - description: >- - Event type identifier, always "response.created" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCreated - description: >- - Streaming event indicating a new response has been created. - OpenAIResponseObjectStreamResponseFailed: + - response + title: OpenAIResponseObjectStreamResponseCreated type: object + OpenAIResponseObjectStreamResponseFailed: + description: Streaming event emitted when a response fails. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Response object describing the failure sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.failed default: response.failed - description: >- - Event type identifier, always "response.failed" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type + - response + - sequence_number title: OpenAIResponseObjectStreamResponseFailed - description: >- - Streaming event emitted when a response fails. - "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": type: object + OpenAIResponseObjectStreamResponseFileSearchCallCompleted: + description: Streaming event for completed file search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.completed default: response.file_search_call.completed - description: >- - Event type identifier, always "response.file_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallCompleted - description: >- - Streaming event for completed file search calls. - "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseFileSearchCallInProgress: + description: Streaming event for file search calls in progress. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.in_progress default: response.file_search_call.in_progress - description: >- - Event type identifier, always "response.file_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallInProgress - description: >- - Streaming event for file search calls in progress. - "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseFileSearchCallSearching: + description: Streaming event for file search currently searching. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.searching default: response.file_search_call.searching - description: >- - Event type identifier, always "response.file_search_call.searching" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallSearching - description: >- - Streaming event for file search currently searching. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta: + description: Streaming event for incremental function call argument updates. properties: delta: + title: Delta type: string - description: >- - Incremental function call arguments being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the function call being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.delta default: response.function_call_arguments.delta - description: >- - Event type identifier, always "response.function_call_arguments.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta - description: >- - Streaming event for incremental function call argument updates. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone: + description: Streaming event for when function call arguments are completed. properties: arguments: + title: Arguments type: string - description: >- - Final complete arguments JSON string for the function call item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed function call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.done default: response.function_call_arguments.done - description: >- - Event type identifier, always "response.function_call_arguments.done" - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone - description: >- - Streaming event for when function call arguments are completed. - "OpenAIResponseObjectStreamResponseInProgress": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseInProgress: + description: Streaming event indicating the response remains in progress. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Current response state while in progress sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.in_progress default: response.in_progress - description: >- - Event type identifier, always "response.in_progress" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseInProgress - description: >- - Streaming event indicating the response remains in progress. - "OpenAIResponseObjectStreamResponseIncomplete": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseInProgress type: object + OpenAIResponseObjectStreamResponseIncomplete: + description: Streaming event emitted when a response ends in an incomplete state. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: >- - Response object describing the incomplete state sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.incomplete default: response.incomplete - description: >- - Event type identifier, always "response.incomplete" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseIncomplete - description: >- - Streaming event emitted when a response ends in an incomplete state. - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseIncomplete type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta: properties: delta: + title: Delta type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.delta default: response.mcp_call.arguments.delta - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone: properties: arguments: + title: Arguments type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.done default: response.mcp_call.arguments.done - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDone - "OpenAIResponseObjectStreamResponseMcpCallCompleted": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseMcpCallCompleted: + description: Streaming event for completed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.completed default: response.mcp_call.completed - description: >- - Event type identifier, always "response.mcp_call.completed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallCompleted - description: Streaming event for completed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallCompleted type: object + OpenAIResponseObjectStreamResponseMcpCallFailed: + description: Streaming event for failed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.failed default: response.mcp_call.failed - description: >- - Event type identifier, always "response.mcp_call.failed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallFailed - description: Streaming event for failed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallFailed type: object + OpenAIResponseObjectStreamResponseMcpCallInProgress: + description: Streaming event for MCP calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the MCP call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.in_progress default: response.mcp_call.in_progress - description: >- - Event type identifier, always "response.mcp_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallInProgress - description: >- - Streaming event for MCP calls in progress. - "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallInProgress type: object + OpenAIResponseObjectStreamResponseMcpListToolsCompleted: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.completed default: response.mcp_list_tools.completed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsCompleted - "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted type: object + OpenAIResponseObjectStreamResponseMcpListToolsFailed: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.failed default: response.mcp_list_tools.failed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsFailed - "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed type: object + OpenAIResponseObjectStreamResponseMcpListToolsInProgress: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.in_progress default: response.mcp_list_tools.in_progress - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsInProgress - "OpenAIResponseObjectStreamResponseOutputItemAdded": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress type: object + OpenAIResponseObjectStreamResponseOutputItemAdded: + description: Streaming event for when a new output item is added to the response. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The output item that was added (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.added default: response.output_item.added - description: >- - Event type identifier, always "response.output_item.added" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemAdded - description: >- - Streaming event for when a new output item is added to the response. - "OpenAIResponseObjectStreamResponseOutputItemDone": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemAdded type: object + OpenAIResponseObjectStreamResponseOutputItemDone: + description: Streaming event for when an output item is completed. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The completed output item (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.done default: response.output_item.done - description: >- - Event type identifier, always "response.output_item.done" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemDone - description: >- - Streaming event for when an output item is completed. - "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemDone type: object + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded: + description: Streaming event for when an annotation is added to output text. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the item to which the annotation is being added output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response's output array content_index: + title: Content Index type: integer - description: >- - Index position of the content part within the output item annotation_index: + title: Annotation Index type: integer - description: >- - Index of the annotation within the content part annotation: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' - description: The annotation object being added + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.annotation.added default: response.output_text.annotation.added - description: >- - Event type identifier, always "response.output_text.annotation.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - content_index - - annotation_index - - annotation - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded - description: >- - Streaming event for when an annotation is added to output text. - "OpenAIResponseObjectStreamResponseOutputTextDelta": + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded type: object + OpenAIResponseObjectStreamResponseOutputTextDelta: + description: Streaming event for incremental text content updates. properties: content_index: + title: Content Index type: integer - description: Index position within the text content delta: + title: Delta type: string - description: Incremental text content being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.delta default: response.output_text.delta - description: >- - Event type identifier, always "response.output_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDelta - description: >- - Streaming event for incremental text content updates. - "OpenAIResponseObjectStreamResponseOutputTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDelta type: object + OpenAIResponseObjectStreamResponseOutputTextDone: + description: Streaming event for when text output is completed. properties: content_index: + title: Content Index type: integer - description: Index position within the text content text: + title: Text type: string - description: >- - Final complete text content of the output item item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.done default: response.output_text.done - description: >- - Event type identifier, always "response.output_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDone - description: >- - Streaming event for when text output is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded: + description: Streaming event for when a new reasoning summary part is added. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The summary part that was added sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.added default: response.reasoning_summary_part.added - description: >- - Event type identifier, always "response.reasoning_summary_part.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded - description: >- - Streaming event for when a new reasoning summary part is added. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone: + description: Streaming event for when a reasoning summary part is completed. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The completed summary part sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.done default: response.reasoning_summary_part.done - description: >- - Event type identifier, always "response.reasoning_summary_part.done" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartDone - description: >- - Streaming event for when a reasoning summary part is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta: + description: Streaming event for incremental reasoning summary text updates. properties: delta: + title: Delta type: string - description: Incremental summary text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.delta default: response.reasoning_summary_text.delta - description: >- - Event type identifier, always "response.reasoning_summary_text.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta - description: >- - Streaming event for incremental reasoning summary text updates. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + - delta + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone: + description: Streaming event for when reasoning summary text is completed. properties: text: + title: Text type: string - description: Final complete summary text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.done default: response.reasoning_summary_text.done - description: >- - Event type identifier, always "response.reasoning_summary_text.done" - additionalProperties: false + title: Type + type: string required: - - text - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDone - description: >- - Streaming event for when reasoning summary text is completed. - "OpenAIResponseObjectStreamResponseReasoningTextDelta": + - text + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone type: object + OpenAIResponseObjectStreamResponseReasoningTextDelta: + description: Streaming event for incremental reasoning text updates. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part delta: + title: Delta type: string - description: Incremental reasoning text being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.delta default: response.reasoning_text.delta - description: >- - Event type identifier, always "response.reasoning_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDelta - description: >- - Streaming event for incremental reasoning text updates. - "OpenAIResponseObjectStreamResponseReasoningTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningTextDone: + description: Streaming event for when reasoning text is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part text: + title: Text type: string - description: Final complete reasoning text item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.done default: response.reasoning_text.done - description: >- - Event type identifier, always "response.reasoning_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDone - description: >- - Streaming event for when reasoning text is completed. - "OpenAIResponseObjectStreamResponseRefusalDelta": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDone type: object + OpenAIResponseObjectStreamResponseRefusalDelta: + description: Streaming event for incremental refusal text updates. properties: content_index: + title: Content Index type: integer - description: Index position of the content part delta: + title: Delta type: string - description: Incremental refusal text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.delta default: response.refusal.delta - description: >- - Event type identifier, always "response.refusal.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDelta - description: >- - Streaming event for incremental refusal text updates. - "OpenAIResponseObjectStreamResponseRefusalDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDelta type: object + OpenAIResponseObjectStreamResponseRefusalDone: + description: Streaming event for when refusal text is completed. properties: content_index: + title: Content Index type: integer - description: Index position of the content part refusal: + title: Refusal type: string - description: Final complete refusal text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.done default: response.refusal.done - description: >- - Event type identifier, always "response.refusal.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - refusal - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDone - description: >- - Streaming event for when refusal text is completed. - "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + - content_index + - refusal + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDone type: object + OpenAIResponseObjectStreamResponseWebSearchCallCompleted: + description: Streaming event for completed web search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.completed default: response.web_search_call.completed - description: >- - Event type identifier, always "response.web_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallCompleted - description: >- - Streaming event for completed web search calls. - "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseWebSearchCallInProgress: + description: Streaming event for web search calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.in_progress default: response.web_search_call.in_progress - description: >- - Event type identifier, always "response.web_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallInProgress - description: >- - Streaming event for web search calls in progress. - "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseWebSearchCallSearching: properties: item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.web_search_call.searching default: response.web_search_call.searching - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallSearching - OpenAIDeleteResponseObject: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching type: object + OpenAIDeleteResponseObject: properties: id: type: string - description: >- - Unique identifier of the deleted response + title: Id object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" deleted: type: boolean + title: Deleted default: true - description: Deletion confirmation flag, always True - additionalProperties: false - required: - - id - - object - - deleted - title: OpenAIDeleteResponseObject - description: >- - Response object confirming deletion of an OpenAI response. - ListOpenAIResponseInputItem: type: object + required: + - id + title: OpenAIDeleteResponseObject + description: Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: properties: data: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: List of input items + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Data object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - object - title: ListOpenAIResponseInputItem - description: >- - List container for OpenAI response input items. - RunShieldRequest: type: object + required: + - data + title: ListOpenAIResponseInputItem + description: List container for OpenAI response input items. + RunShieldRequest: properties: shield_id: type: string - description: The identifier of the shield to run. + title: Shield Id messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: The messages to run the shield on. - params: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) + type: array + title: Messages + params: + additionalProperties: true + type: object + title: Params + type: object required: - - shield_id - - messages - - params + - shield_id + - messages + - params title: RunShieldRequest RunShieldResponse: - type: object properties: violation: - $ref: '#/components/schemas/SafetyViolation' - description: >- - (Optional) Safety violation detected by the shield, if any - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/SafetyViolation' + title: SafetyViolation + - type: 'null' + title: SafetyViolation + type: object title: RunShieldResponse description: Response from running a safety shield. SafetyViolation: - type: object properties: violation_level: $ref: '#/components/schemas/ViolationLevel' - description: Severity level of the violation user_message: - type: string - description: >- - (Optional) Message to convey to the user about the violation + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata including specific violation codes for debugging and - telemetry - additionalProperties: false + title: Metadata + type: object required: - - violation_level - - metadata + - violation_level title: SafetyViolation - description: >- - Details of a safety violation detected by content moderation. + description: Details of a safety violation detected by content moderation. ViolationLevel: type: string enum: - - info - - warn - - error + - info + - warn + - error title: ViolationLevel description: Severity level of a safety violation. - AgentTurnInputType: - type: object - properties: - type: - type: string - const: agent_turn_input - default: agent_turn_input - description: >- - Discriminator type. Always "agent_turn_input" - additionalProperties: false - required: - - type - title: AgentTurnInputType - description: Parameter type for agent turn input. AggregationFunctionType: type: string enum: - - average - - weighted_average - - median - - categorical_count - - accuracy + - average + - weighted_average + - median + - categorical_count + - accuracy title: AggregationFunctionType - description: >- - Types of aggregation functions for scoring results. + description: Types of aggregation functions for scoring results. ArrayType: - type: object properties: type: type: string const: array + title: Type default: array - description: Discriminator type. Always "array" - additionalProperties: false - required: - - type + type: object title: ArrayType description: Parameter type for array values. BasicScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: basic + title: Type default: basic - description: >- - The type of scoring function parameters, always basic aggregation_functions: - type: array items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - aggregation_functions - title: BasicScoringFnParams - description: >- - Parameters for basic scoring function configuration. - BooleanType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: BasicScoringFnParams + description: Parameters for basic scoring function configuration. + BooleanType: properties: type: type: string const: boolean + title: Type default: boolean - description: Discriminator type. Always "boolean" - additionalProperties: false - required: - - type + type: object title: BooleanType description: Parameter type for boolean values. ChatCompletionInputType: - type: object properties: type: type: string const: chat_completion_input + title: Type default: chat_completion_input - description: >- - Discriminator type. Always "chat_completion_input" - additionalProperties: false - required: - - type - title: ChatCompletionInputType - description: >- - Parameter type for chat completion input. - CompletionInputType: type: object + title: ChatCompletionInputType + description: Parameter type for chat completion input. + CompletionInputType: properties: type: type: string const: completion_input + title: Type default: completion_input - description: >- - Discriminator type. Always "completion_input" - additionalProperties: false - required: - - type + type: object title: CompletionInputType description: Parameter type for completion input. JsonType: - type: object properties: type: type: string const: json + title: Type default: json - description: Discriminator type. Always "json" - additionalProperties: false - required: - - type + type: object title: JsonType description: Parameter type for JSON values. LLMAsJudgeScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: llm_as_judge + title: Type default: llm_as_judge - description: >- - The type of scoring function parameters, always llm_as_judge judge_model: type: string - description: >- - Identifier of the LLM model to use as a judge for scoring + title: Judge Model prompt_template: - type: string - description: >- - (Optional) Custom prompt template for the judge model + anyOf: + - type: string + - type: 'null' judge_score_regexes: - type: array items: type: string - description: >- - Regexes to extract the answer from generated response - aggregation_functions: type: array + title: Judge Score Regexes + description: Regexes to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - judge_model - - judge_score_regexes - - aggregation_functions - title: LLMAsJudgeScoringFnParams - description: >- - Parameters for LLM-as-judge scoring function configuration. - NumberType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + required: + - judge_model + title: LLMAsJudgeScoringFnParams + description: Parameters for LLM-as-judge scoring function configuration. + NumberType: properties: type: type: string const: number + title: Type default: number - description: Discriminator type. Always "number" - additionalProperties: false - required: - - type + type: object title: NumberType description: Parameter type for numeric values. ObjectType: - type: object properties: type: type: string const: object + title: Type default: object - description: Discriminator type. Always "object" - additionalProperties: false - required: - - type + type: object title: ObjectType description: Parameter type for object values. RegexParserScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: regex_parser + title: Type default: regex_parser - description: >- - The type of scoring function parameters, always regex_parser parsing_regexes: - type: array items: type: string - description: >- - Regex to extract the answer from generated response - aggregation_functions: type: array + title: Parsing Regexes + description: Regex to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - parsing_regexes - - aggregation_functions - title: RegexParserScoringFnParams - description: >- - Parameters for regex parser scoring function configuration. - ScoringFn: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: RegexParserScoringFnParams + description: Parameters for regex parser scoring function configuration. + ScoringFn: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: scoring_function + title: Type default: scoring_function - description: >- - The resource type, always scoring_function description: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata + description: Any additional metadata for this definition return_type: oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + description: The return type of the deterministic function discriminator: propertyName: type mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' + boolean: '#/components/schemas/BooleanType' chat_completion_input: '#/components/schemas/ChatCompletionInputType' completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval + type: object required: - - identifier - - provider_id - - type - - metadata - - return_type + - identifier + - provider_id + - return_type title: ScoringFn - description: >- - A scoring function resource for evaluating model outputs. + description: A scoring function resource for evaluating model outputs. ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' discriminator: - propertyName: type mapping: + basic: '#/components/schemas/BasicScoringFnParams' llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams ScoringFnParamsType: - type: string + description: Types of scoring function parameter configurations. enum: - - llm_as_judge - - regex_parser - - basic + - llm_as_judge + - regex_parser + - basic title: ScoringFnParamsType - description: >- - Types of scoring function parameter configurations. + type: string StringType: - type: object properties: type: type: string const: string + title: Type default: string - description: Discriminator type. Always "string" - additionalProperties: false - required: - - type + type: object title: StringType description: Parameter type for string values. UnionType: - type: object properties: type: type: string const: union + title: Type default: union - description: Discriminator type. Always "union" - additionalProperties: false - required: - - type + type: object title: UnionType description: Parameter type for union values. ListScoringFunctionsResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/ScoringFn' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListScoringFunctionsResponse - ParamType: - oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' - discriminator: - propertyName: type - mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' - array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' - chat_completion_input: '#/components/schemas/ChatCompletionInputType' - completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' - RegisterScoringFunctionRequest: - type: object - properties: - scoring_fn_id: - type: string - description: >- - The ID of the scoring function to register. - description: - type: string - description: The description of the scoring function. - return_type: - $ref: '#/components/schemas/ParamType' - description: The return type of the scoring function. - provider_scoring_fn_id: - type: string - description: >- - The ID of the provider scoring function to use for the scoring function. - provider_id: - type: string - description: >- - The ID of the provider to use for the scoring function. - params: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - The parameters for the scoring function for benchmark eval, these can - be overridden for app eval. - additionalProperties: false - required: - - scoring_fn_id - - description - - return_type - title: RegisterScoringFunctionRequest ScoreRequest: - type: object properties: input_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to score. + type: array + title: Input Rows scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions + type: object required: - - input_rows - - scoring_functions + - input_rows + - scoring_functions title: ScoreRequest ScoreResponse: - type: object properties: results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult. - additionalProperties: false + type: object + title: Results + type: object required: - - results + - results title: ScoreResponse description: The response from scoring. ScoringResult: - type: object properties: score_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. + type: array + title: Score Rows aggregated_results: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false + title: Aggregated Results + type: object required: - - score_rows - - aggregated_results + - score_rows + - aggregated_results title: ScoringResult description: A scoring result for a single row. ScoreBatchRequest: - type: object properties: dataset_id: type: string - description: The ID of the dataset to score. + title: Dataset Id scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions save_results_dataset: type: boolean - description: >- - Whether to save the results to a dataset. - additionalProperties: false + title: Save Results Dataset + default: false + type: object required: - - dataset_id - - scoring_functions - - save_results_dataset + - dataset_id + - scoring_functions title: ScoreBatchRequest ScoreBatchResponse: - type: object properties: dataset_id: - type: string - description: >- - (Optional) The identifier of the dataset that was scored + anyOf: + - type: string + - type: 'null' results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult - additionalProperties: false - required: - - results - title: ScoreBatchResponse - description: >- - Response from batch scoring operations on datasets. - Shield: + type: object + title: Results type: object + required: + - results + title: ScoreBatchResponse + description: Response from batch scoring operations on datasets. + Shield: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: shield + title: Type default: shield - description: The resource type, always shield params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Configuration parameters for the shield - additionalProperties: false - required: - - identifier - - provider_id - - type - title: Shield - description: >- - A safety shield resource that can be used to check content. - ListShieldsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: Shield + description: A safety shield resource that can be used to check content. + ListShieldsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Shield' - additionalProperties: false - required: - - data - title: ListShieldsResponse - RegisterShieldRequest: - type: object - properties: - shield_id: - type: string - description: >- - The identifier of the shield to register. - provider_shield_id: - type: string - description: >- - The identifier of the shield in the provider. - provider_id: - type: string - description: The identifier of the provider. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false - required: - - shield_id - title: RegisterShieldRequest - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - ImageContentItem: + title: Data type: object + required: + - data + title: ListShieldsResponse + ImageContentItem: + description: A image content item properties: type: - type: string const: image default: image - description: >- - Discriminator type of the content item. Always "image" + title: Type + type: string image: - type: object - properties: - url: - $ref: '#/components/schemas/URL' - description: >- - A URL of the image or data URL in the format of data:image/{type};base64,{data}. - Note that URL could have length limits. - data: - type: string - contentEncoding: base64 - description: base64 encoded image data as string - additionalProperties: false - description: >- - Image as a base64 encoded string or an URL - additionalProperties: false + $ref: '#/components/schemas/_URLOrData' required: - - type - - image + - image title: ImageContentItem - description: A image content item + type: object InterleavedContent: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - InterleavedContentItem: - oneOf: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + InterleavedContentItem: discriminator: - propertyName: type mapping: image: '#/components/schemas/ImageContentItem' text: '#/components/schemas/TextContentItem' - Message: + propertyName: type oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/UserMessage' - system: '#/components/schemas/SystemMessage' - tool: '#/components/schemas/ToolResponseMessage' - assistant: '#/components/schemas/CompletionMessage' - SystemMessage: - type: object - properties: - role: - type: string - const: system - default: system - description: >- - Must be "system" to identify this as a system message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). - additionalProperties: false - required: - - role - - content - title: SystemMessage - description: >- - A system message providing instructions or context to the model. + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem TextContentItem: - type: object properties: type: type: string const: text + title: Type default: text - description: >- - Discriminator type of the content item. Always "text" text: type: string - description: Text content - additionalProperties: false + title: Text + type: object required: - - type - - text + - text title: TextContentItem description: A text content item - ToolCall: - type: object + ToolInvocationResult: properties: - call_id: - type: string - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - arguments: - type: string - additionalProperties: false - required: - - call_id - - tool_name - - arguments - title: ToolCall - ToolResponseMessage: - type: object - properties: - role: - type: string - const: tool - default: tool - description: >- - Must be "tool" to identify this as a tool response - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - additionalProperties: false - required: - - role - - call_id - - content - title: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - URL: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem-Output | TextContentItem] + error_message: + anyOf: + - type: string + - type: 'null' + error_code: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: ToolInvocationResult + description: Result of a tool invocation. + URL: properties: uri: type: string - description: The URL string pointing to the resource - additionalProperties: false + title: Uri + type: object required: - - uri + - uri title: URL description: A URL reference to external content. - UserMessage: - type: object - properties: - role: - type: string - const: user - default: user - description: >- - Must be "user" to identify this as a user message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the message, which can include text and other media - context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) This field is used internally by Llama Stack to pass RAG context. - This field may be removed in the API in the future. - additionalProperties: false - required: - - role - - content - title: UserMessage - description: >- - A message from the user in a chat conversation. - SyntheticDataGenerateRequest: - type: object - properties: - dialogs: - type: array - items: - $ref: '#/components/schemas/Message' - description: >- - List of conversation messages to use as input for synthetic data generation - filtering_function: - type: string - enum: - - none - - random - - top_k - - top_p - - top_k_top_p - - sigmoid - description: >- - Type of filtering to apply to generated synthetic data samples - model: - type: string - description: >- - (Optional) The identifier of the model to use. The model must be registered - with Llama Stack and available via the /models endpoint - additionalProperties: false - required: - - dialogs - - filtering_function - title: SyntheticDataGenerateRequest - SyntheticDataGenerationResponse: - type: object - properties: - synthetic_data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - List of generated synthetic data samples that passed the filtering criteria - statistics: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Statistical information about the generation process and filtering - results - additionalProperties: false - required: - - synthetic_data - title: SyntheticDataGenerationResponse - description: >- - Response from the synthetic data generation. Batch of (prompt, response, score) - tuples that pass the threshold. - InvokeToolRequest: - type: object - properties: - tool_name: - type: string - description: The name of the tool to invoke. - kwargs: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool. - additionalProperties: false - required: - - tool_name - - kwargs - title: InvokeToolRequest - ToolInvocationResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The output content from the tool execution - error_message: - type: string - description: >- - (Optional) Error message if the tool execution failed - error_code: - type: integer - description: >- - (Optional) Numeric error code if the tool execution failed - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool execution - additionalProperties: false - title: ToolInvocationResult - description: Result of a tool invocation. ToolDef: - type: object properties: toolgroup_id: - type: string - description: >- - (Optional) ID of the tool group this tool belongs to + anyOf: + - type: string + - type: 'null' name: type: string - description: Name of the tool + title: Name description: - type: string - description: >- - (Optional) Human-readable description of what the tool does + anyOf: + - type: string + - type: 'null' input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool inputs (MCP inputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' output_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool outputs (MCP outputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - name - title: ToolDef - description: >- - Tool definition used in runtime contexts. - ListToolDefsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - name + title: ToolDef + description: Tool definition used in runtime contexts. + ListToolDefsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolDef' - description: List of tool definitions - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolDefsResponse - description: >- - Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. + description: Response containing a list of tool definitions. ToolGroup: - type: object properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: tool_group + title: Type default: tool_group - description: Type of resource, always 'tool_group' mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - (Optional) Model Context Protocol endpoint for remote tools + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional arguments for the tool group - additionalProperties: false - required: - - identifier - - provider_id - - type - title: ToolGroup - description: >- - A group of related tools managed together. - ListToolGroupsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: ToolGroup + description: A group of related tools managed together. + ListToolGroupsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolGroup' - description: List of tool groups - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolGroupsResponse - description: >- - Response containing a list of tool groups. - RegisterToolGroupRequest: - type: object - properties: - toolgroup_id: - type: string - description: The ID of the tool group to register. - provider_id: - type: string - description: >- - The ID of the provider to use for the tool group. - mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - The MCP endpoint to use for the tool group. - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool group. - additionalProperties: false - required: - - toolgroup_id - - provider_id - title: RegisterToolGroupRequest + description: Response containing a list of tool groups. Chunk: - type: object + description: A chunk of content that can be inserted into a vector database. properties: content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, or other - types. - metadata: - type: object - additionalProperties: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk that will be used in the model context - during inference. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - stored_chunk_id: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + chunk_id: + title: Chunk Id type: string - description: >- - The chunk ID that is stored in the vector database. Used for backend functionality. + metadata: + additionalProperties: true + title: Metadata + type: object + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true chunk_metadata: - $ref: '#/components/schemas/ChunkMetadata' - description: >- - Metadata for the chunk that will NOT be used in the context during inference. - The `chunk_metadata` is required backend functionality. - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + nullable: true + title: ChunkMetadata required: - - content - - metadata + - content + - chunk_id title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. - ChunkMetadata: type: object + ChunkMetadata: properties: chunk_id: - type: string - description: >- - The ID of the chunk. If not set, it will be generated based on the document - ID and content. + anyOf: + - type: string + - type: 'null' document_id: - type: string - description: >- - The ID of the document this chunk belongs to. + anyOf: + - type: string + - type: 'null' source: - type: string - description: >- - The source of the content, such as a URL, file path, or other identifier. + anyOf: + - type: string + - type: 'null' created_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was created. + anyOf: + - type: integer + - type: 'null' updated_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was last updated. + anyOf: + - type: integer + - type: 'null' chunk_window: - type: string - description: >- - The window of the chunk, which can be used to group related chunks together. + anyOf: + - type: string + - type: 'null' chunk_tokenizer: - type: string - description: >- - The tokenizer used to create the chunk. Default is Tiktoken. + anyOf: + - type: string + - type: 'null' chunk_embedding_model: - type: string - description: >- - The embedding model used to create the chunk's embedding. + anyOf: + - type: string + - type: 'null' chunk_embedding_dimension: - type: integer - description: >- - The dimension of the embedding vector for the chunk. + anyOf: + - type: integer + - type: 'null' content_token_count: - type: integer - description: >- - The number of tokens in the content of the chunk. + anyOf: + - type: integer + - type: 'null' metadata_token_count: - type: integer - description: >- - The number of tokens in the metadata of the chunk. - additionalProperties: false - title: ChunkMetadata - description: >- - `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional - information about the chunk that will not be used in the context during - inference, but is required for backend functionality. The `ChunkMetadata` is - set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not - expected to change after. Use `Chunk.metadata` for metadata that will - be used in the context during inference. - InsertChunksRequest: + anyOf: + - type: integer + - type: 'null' type: object + title: ChunkMetadata + description: |- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + InsertChunksRequest: properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to insert the chunks into. + title: Vector Store Id chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - The chunks to insert. Each `Chunk` should contain content which can be - interleaved text, images, or other types. `metadata`: `dict[str, Any]` - and `embedding`: `List[float]` are optional. If `metadata` is provided, - you configure how Llama Stack formats the chunk during generation. If - `embedding` is not provided, it will be computed later. + $ref: '#/components/schemas/Chunk-Input' + type: array + title: Chunks ttl_seconds: - type: integer - description: The time to live of the chunks. - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + type: object required: - - vector_db_id - - chunks + - vector_store_id + - chunks title: InsertChunksRequest QueryChunksRequest: - type: object properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to query. + title: Vector Store Id query: - $ref: '#/components/schemas/InterleavedContent' - description: The query to search for. + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the query. - additionalProperties: false + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object required: - - vector_db_id - - query + - vector_store_id + - query title: QueryChunksRequest QueryChunksResponse: - type: object properties: chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - List of content chunks returned from the query - scores: + $ref: '#/components/schemas/Chunk-Output' type: array + title: Chunks + scores: items: type: number - description: >- - Relevance scores corresponding to each returned chunk - additionalProperties: false - required: - - chunks - - scores - title: QueryChunksResponse - description: >- - Response from querying chunks in a vector database. - VectorStoreFileCounts: + type: array + title: Scores type: object + required: + - chunks + - scores + title: QueryChunksResponse + description: Response from querying chunks in a vector database. + VectorStoreFileCounts: properties: completed: type: integer - description: >- - Number of files that have been successfully processed + title: Completed cancelled: type: integer - description: >- - Number of files that had their processing cancelled + title: Cancelled failed: type: integer - description: Number of files that failed to process + title: Failed in_progress: type: integer - description: >- - Number of files currently being processed + title: In Progress total: type: integer - description: >- - Total number of files in the vector store - additionalProperties: false - required: - - completed - - cancelled - - failed - - in_progress - - total - title: VectorStoreFileCounts - description: >- - File processing status counts for a vector store. - VectorStoreListResponse: + title: Total type: object + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: File processing status counts for a vector store. + VectorStoreListResponse: properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreObject' - description: List of vector store objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first vector store in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last vector store in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more vector stores available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreListResponse description: Response from listing vector stores. VectorStoreObject: - type: object properties: id: type: string - description: Unique identifier for the vector store + title: Id object: type: string + title: Object default: vector_store - description: >- - Object type identifier, always "vector_store" created_at: type: integer - description: >- - Timestamp when the vector store was created + title: Created At name: - type: string - description: (Optional) Name of the vector store + anyOf: + - type: string + - type: 'null' usage_bytes: type: integer + title: Usage Bytes default: 0 - description: >- - Storage space used by the vector store in bytes file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the vector store status: type: string + title: Status default: completed - description: Current status of the vector store expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store + anyOf: + - additionalProperties: true + type: object + - type: 'null' expires_at: - type: integer - description: >- - (Optional) Timestamp when the vector store will expire + anyOf: + - type: integer + - type: 'null' last_active_at: - type: integer - description: >- - (Optional) Timestamp of last activity on the vector store + anyOf: + - type: integer + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false + title: Metadata + type: object required: - - id - - object - - created_at - - usage_bytes - - file_counts - - status - - metadata + - id + - created_at + - file_counts title: VectorStoreObject description: OpenAI Vector Store object. - "OpenAICreateVectorStoreRequestWithExtraBody": - type: object - properties: - name: - type: string - description: (Optional) A name for the vector store - file_ids: - type: array - items: - type: string - description: >- - List of file IDs to include in the vector store - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store - chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Strategy for splitting files into chunks - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false - title: >- - OpenAICreateVectorStoreRequestWithExtraBody - description: >- - Request to create a vector store with extra_body support. - OpenaiUpdateVectorStoreRequest: - type: object - properties: - name: - type: string - description: The name of the vector store. - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The expiration policy for a vector store. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of 16 key-value pairs that can be attached to an object. - additionalProperties: false - title: OpenaiUpdateVectorStoreRequest - VectorStoreDeleteResponse: - type: object - properties: - id: - type: string - description: >- - Unique identifier of the deleted vector store - object: - type: string - default: vector_store.deleted - description: >- - Object type identifier for the deletion response - deleted: - type: boolean - default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreDeleteResponse - description: Response from deleting a vector store. VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' discriminator: - propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + propertyName: type + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic VectorStoreChunkingStrategyAuto: - type: object properties: type: type: string const: auto + title: Type default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: type: object + title: VectorStoreChunkingStrategyAuto + description: Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: properties: type: type: string const: static + title: Type default: static - description: >- - Strategy type, always "static" for static chunking static: $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: type: object + required: + - static + title: VectorStoreChunkingStrategyStatic + description: Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: properties: chunk_overlap_tokens: type: integer + title: Chunk Overlap Tokens default: 400 - description: >- - Number of tokens to overlap between adjacent chunks max_chunk_size_tokens: type: integer + maximum: 4096.0 + minimum: 100.0 + title: Max Chunk Size Tokens default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens + type: object title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. - "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": - type: object + description: Configuration for static chunking strategy. + OpenAICreateVectorStoreRequestWithExtraBody: properties: + name: + anyOf: + - type: string + - type: 'null' file_ids: - type: array - items: - type: string - description: >- - A list of File IDs that the vector store should use - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes to store with the files + anyOf: + - items: + type: string + type: array + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto - additionalProperties: false - required: - - file_ids - title: >- - OpenAICreateVectorStoreFileBatchRequestWithExtraBody - description: >- - Request to create a vector store file batch with extra_body support. - VectorStoreFileBatchObject: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + additionalProperties: true type: object + title: OpenAICreateVectorStoreRequestWithExtraBody + description: Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + properties: + name: + anyOf: + - type: string + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: properties: id: type: string - description: Unique identifier for the file batch + title: Id object: type: string + title: Object + default: vector_store.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + OpenAICreateVectorStoreFileBatchRequestWithExtraBody: + properties: + file_ids: + items: + type: string + type: array + title: File Ids + attributes: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + additionalProperties: true + type: object + required: + - file_ids + title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object default: vector_store.file_batch - description: >- - Object type identifier, always "vector_store.file_batch" created_at: type: integer - description: >- - Timestamp when the file batch was created + title: Created At vector_store_id: type: string - description: >- - ID of the vector store containing the file batch + title: Vector Store Id status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: >- - Current processing status of the file batch + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the batch - additionalProperties: false + type: object required: - - id - - object - - created_at - - vector_store_id - - status - - file_counts + - id + - created_at + - vector_store_id + - status + - file_counts title: VectorStoreFileBatchObject description: OpenAI Vector Store File Batch object. VectorStoreFileStatus: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed VectorStoreFileLastError: - type: object properties: code: - oneOf: - - type: string - const: server_error - - type: string - const: rate_limit_exceeded - description: >- - Error code indicating the type of failure + title: Code + type: string + enum: + - server_error + - rate_limit_exceeded + default: server_error message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: VectorStoreFileLastError - description: >- - Error information for failed vector store file processing. - VectorStoreFileObject: + title: Message type: object + required: + - code + - message + title: VectorStoreFileLastError + description: Error information for failed vector store file processing. + VectorStoreFileObject: properties: id: type: string - description: Unique identifier for the file + title: Id object: type: string + title: Object default: vector_store.file - description: >- - Object type identifier, always "vector_store.file" attributes: - type: object additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + title: string | number | boolean + propertyNames: + type: string + maxLength: 64 + type: object + maxProperties: 16 + title: Attributes + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers. + x-oaiTypeLabel: map chunking_strategy: oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic discriminator: propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - description: >- - Strategy used for splitting the file into chunks created_at: type: integer - description: >- - Timestamp when the file was added to the vector store + title: Created At last_error: - $ref: '#/components/schemas/VectorStoreFileLastError' - description: >- - (Optional) Error information if file processing failed + anyOf: + - $ref: '#/components/schemas/VectorStoreFileLastError' + title: VectorStoreFileLastError + - type: 'null' + title: VectorStoreFileLastError status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: Current processing status of the file + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed usage_bytes: type: integer + title: Usage Bytes default: 0 - description: Storage space used by this file in bytes vector_store_id: type: string - description: >- - ID of the vector store containing this file - additionalProperties: false + title: Vector Store Id + type: object required: - - id - - object - - attributes - - chunking_strategy - - created_at - - status - - usage_bytes - - vector_store_id + - id + - chunking_strategy + - created_at + - status + - vector_store_id title: VectorStoreFileObject description: OpenAI Vector Store File object. VectorStoreFilesListInBatchResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: >- - List of vector store file objects in the batch + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreFilesListInBatchResponse - description: >- - Response from listing files in a vector store file batch. + description: Response from listing files in a vector store file batch. VectorStoreListFilesResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: List of vector store file objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false - required: - - object - - data - - has_more - title: VectorStoreListFilesResponse - description: >- - Response from listing files in a vector store. - OpenaiAttachFileToVectorStoreRequest: type: object + required: + - data + title: VectorStoreListFilesResponse + description: Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: properties: file_id: type: string - description: >- - The ID of the file to attach to the vector store. + title: File Id attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The key-value attributes stored with the file, which can be used for filtering. + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - The chunking strategy to use for the file. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + type: object required: - - file_id + - file_id title: OpenaiAttachFileToVectorStoreRequest OpenaiUpdateVectorStoreFileRequest: - type: object properties: attributes: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The updated key-value attributes to store with the file. - additionalProperties: false + title: Attributes + type: object required: - - attributes + - attributes title: OpenaiUpdateVectorStoreFileRequest VectorStoreFileDeleteResponse: - type: object properties: id: type: string - description: Unique identifier of the deleted file + title: Id object: type: string + title: Object default: vector_store.file.deleted - description: >- - Object type identifier for the deletion response deleted: type: boolean + title: Deleted default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreFileDeleteResponse - description: >- - Response from deleting a vector store file. - VectorStoreContent: type: object + required: + - id + title: VectorStoreFileDeleteResponse + description: Response from deleting a vector store file. + VectorStoreContent: properties: type: type: string const: text - description: >- - Content type, currently only "text" is supported + title: Type text: type: string - description: The actual text content - additionalProperties: false - required: - - type - - text - title: VectorStoreContent - description: >- - Content item from a vector store file or search result. - VectorStoreFileContentsResponse: - type: object - properties: - file_id: - type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file - additionalProperties: false - required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse - description: >- - Response from retrieving the contents of a vector store file. - OpenaiSearchVectorStoreRequest: - type: object - properties: - query: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - The query string or array for performing the search. - filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Filters based on file attributes to narrow the search results. - max_num_results: - type: integer - description: >- - Maximum number of results to return (1 to 50 inclusive, default 10). - ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: + title: Text + embedding: + anyOf: + - items: type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - Ranking options for fine-tuning the search results. - rewrite_query: - type: boolean - description: >- - Whether to rewrite the natural language query for vector search (default - false) - search_mode: - type: string - description: >- - The search mode to use - "keyword", "vector", or "hybrid" (default "vector") - additionalProperties: false - required: - - query - title: OpenaiSearchVectorStoreRequest - VectorStoreSearchResponse: + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object - properties: - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: Relevance score for this search result - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: number - - type: boolean - description: >- - (Optional) Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: >- - List of content items matching the search query - additionalProperties: false required: - - file_id - - filename - - score - - content - title: VectorStoreSearchResponse - description: Response from searching a vector store. - VectorStoreSearchResponsePage: - type: object + - type + - text + title: VectorStoreContent + description: Content item from a vector store file or search result. + VectorStoreFileContentResponse: properties: object: type: string - default: vector_store.search_results.page - description: >- - Object type identifier for the search results page - search_query: - type: string - description: >- - The original search query that was executed + const: vector_store.file_content.page + title: Object + default: vector_store.file_content.page data: - type: array items: - $ref: '#/components/schemas/VectorStoreSearchResponse' - description: List of search result objects + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Data has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more results available beyond this page next_page: - type: string - description: >- - (Optional) Token for retrieving the next page of results - additionalProperties: false - required: - - object - - search_query - - data - - has_more - title: VectorStoreSearchResponsePage - description: >- - Paginated response from searching a vector store. - VersionInfo: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + title: VectorStoreFileContentResponse + description: Represents the parsed content of a vector store file. + OpenaiSearchVectorStoreRequest: + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + max_num_results: + anyOf: + - type: integer + - type: 'null' + default: 10 + ranking_options: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions + rewrite_query: + anyOf: + - type: boolean + - type: 'null' + default: false + search_mode: + anyOf: + - type: string + - type: 'null' + default: vector + type: object + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + properties: + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + attributes: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: number + - type: boolean + title: string | number | boolean + type: object + - type: 'null' + content: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Content + type: object + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + properties: + object: + type: string + title: Object + default: vector_store.search_results.page + search_query: + items: + type: string + type: array + title: Search Query + data: + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - search_query + - data + title: VectorStoreSearchResponsePage + description: Paginated response from searching a vector store. + VersionInfo: properties: version: type: string - description: Version number of the service - additionalProperties: false + title: Version + type: object required: - - version + - version title: VersionInfo description: Version information for the service. + PaginatedResponse: + properties: + data: + items: + additionalProperties: true + type: object + type: array + title: Data + has_more: + type: boolean + title: Has More + url: + anyOf: + - type: string + - type: 'null' + type: object + required: + - data + - has_more + title: PaginatedResponse + description: A generic paginated response that follows a simple format. + Dataset: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: dataset + title: Type + default: dataset + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + discriminator: + propertyName: type + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + metadata: + additionalProperties: true + type: object + title: Metadata + description: Any additional metadata for this dataset + type: object + required: + - identifier + - provider_id + - purpose + - source + title: Dataset + description: Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + properties: + type: + type: string + const: rows + title: Type + default: rows + rows: + items: + additionalProperties: true + type: object + type: array + title: Rows + type: object + required: + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + properties: + type: + type: string + const: uri + title: Type + default: uri + uri: + type: string + title: Uri + type: object + required: + - uri + title: URIDataSource + description: A dataset that can be obtained from a URI. + ListDatasetsResponse: + properties: + data: + items: + $ref: '#/components/schemas/Dataset' + type: array + title: Data + type: object + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + Benchmark: + properties: + identifier: + type: string + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: + type: string + title: Provider Id + description: ID of the provider that owns this resource + type: + type: string + const: benchmark + title: Type + default: benchmark + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + metadata: + additionalProperties: true + type: object + title: Metadata + description: Metadata for this evaluation task + type: object + required: + - identifier + - provider_id + - dataset_id + - scoring_functions + title: Benchmark + description: A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + properties: + data: + items: + $ref: '#/components/schemas/Benchmark' + type: array + title: Data + type: object + required: + - data + title: ListBenchmarksResponse + BenchmarkConfig: + properties: + eval_candidate: + $ref: '#/components/schemas/ModelCandidate' + scoring_params: + additionalProperties: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + type: object + title: Scoring Params + description: Map between scoring function id and parameters for each scoring function you want to run + num_examples: + anyOf: + - type: integer + - type: 'null' + description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated + type: object + required: + - eval_candidate + title: BenchmarkConfig + description: A benchmark configuration for evaluation. + GreedySamplingStrategy: + properties: + type: + type: string + const: greedy + title: Type + default: greedy + type: object + title: GreedySamplingStrategy + description: Greedy sampling strategy that selects the highest probability token at each step. + ModelCandidate: + properties: + type: + type: string + const: model + title: Type + default: model + model: + type: string + title: Model + sampling_params: + $ref: '#/components/schemas/SamplingParams' + system_message: + anyOf: + - $ref: '#/components/schemas/SystemMessage' + title: SystemMessage + - type: 'null' + title: SystemMessage + type: object + required: + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. + SamplingParams: + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + max_tokens: + anyOf: + - type: integer + - type: 'null' + repetition_penalty: + anyOf: + - type: number + - type: 'null' + default: 1.0 + stop: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: SamplingParams + description: Sampling parameters. + SystemMessage: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + type: object + required: + - content + title: SystemMessage + description: A system message providing instructions or context to the model. + TopKSamplingStrategy: + properties: + type: + type: string + const: top_k + title: Type + default: top_k + top_k: + type: integer + minimum: 1.0 + title: Top K + type: object + required: + - top_k + title: TopKSamplingStrategy + description: Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + properties: + type: + type: string + const: top_p + title: Type + default: top_p + temperature: + anyOf: + - type: number + minimum: 0.0 + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + default: 0.95 + type: object + required: + - temperature + title: TopPSamplingStrategy + description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + EvaluateResponse: + properties: + generations: + items: + additionalProperties: true + type: object + type: array + title: Generations + scores: + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + type: object + title: Scores + type: object + required: + - generations + - scores + title: EvaluateResponse + description: The response from an evaluation. + Job: + properties: + job_id: + type: string + title: Job Id + status: + $ref: '#/components/schemas/JobStatus' + type: object + required: + - job_id + - status + title: Job + description: A job execution instance with status tracking. + RerankData: + properties: + index: + type: integer + title: Index + relevance_score: + type: number + title: Relevance Score + type: object + required: + - index + - relevance_score + title: RerankData + description: A single rerank result from a reranking response. + RerankResponse: + properties: + data: + items: + $ref: '#/components/schemas/RerankData' + type: array + title: Data + type: object + required: + - data + title: RerankResponse + description: Response from a reranking request. + Checkpoint: + properties: + identifier: + type: string + title: Identifier + created_at: + type: string + format: date-time + title: Created At + epoch: + type: integer + title: Epoch + post_training_job_id: + type: string + title: Post Training Job Id + path: + type: string + title: Path + training_metrics: + anyOf: + - $ref: '#/components/schemas/PostTrainingMetric' + title: PostTrainingMetric + - type: 'null' + title: PostTrainingMetric + type: object + required: + - identifier + - created_at + - epoch + - post_training_job_id + - path + title: Checkpoint + description: Checkpoint created during training runs. + PostTrainingJobArtifactsResponse: + properties: + job_uuid: + type: string + title: Job Uuid + checkpoints: + items: + $ref: '#/components/schemas/Checkpoint' + type: array + title: Checkpoints + type: object + required: + - job_uuid + title: PostTrainingJobArtifactsResponse + description: Artifacts of a finetuning job. + PostTrainingMetric: + properties: + epoch: + type: integer + title: Epoch + train_loss: + type: number + title: Train Loss + validation_loss: + type: number + title: Validation Loss + perplexity: + type: number + title: Perplexity + type: object + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: Training metrics captured during post-training jobs. + PostTrainingJobStatusResponse: + properties: + job_uuid: + type: string + title: Job Uuid + status: + $ref: '#/components/schemas/JobStatus' + scheduled_at: + anyOf: + - type: string + format: date-time + - type: 'null' + started_at: + anyOf: + - type: string + format: date-time + - type: 'null' + completed_at: + anyOf: + - type: string + format: date-time + - type: 'null' + resources_allocated: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + checkpoints: + items: + $ref: '#/components/schemas/Checkpoint' + type: array + title: Checkpoints + type: object + required: + - job_uuid + - status + title: PostTrainingJobStatusResponse + description: Status of a finetuning job. + ListPostTrainingJobsResponse: + properties: + data: + items: + $ref: '#/components/schemas/PostTrainingJob' + type: array + title: Data + type: object + required: + - data + title: ListPostTrainingJobsResponse + DPOAlignmentConfig: + properties: + beta: + type: number + title: Beta + loss_type: + $ref: '#/components/schemas/DPOLossType' + default: sigmoid + type: object + required: + - beta + title: DPOAlignmentConfig + description: Configuration for Direct Preference Optimization (DPO) alignment. + DPOLossType: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + title: DPOLossType + DataConfig: + properties: + dataset_id: + type: string + title: Dataset Id + batch_size: + type: integer + title: Batch Size + shuffle: + type: boolean + title: Shuffle + data_format: + $ref: '#/components/schemas/DatasetFormat' + validation_dataset_id: + anyOf: + - type: string + - type: 'null' + packed: + anyOf: + - type: boolean + - type: 'null' + default: false + train_on_input: + anyOf: + - type: boolean + - type: 'null' + default: false + type: object + required: + - dataset_id + - batch_size + - shuffle + - data_format + title: DataConfig + description: Configuration for training data and data loading. + DatasetFormat: + type: string + enum: + - instruct + - dialog + title: DatasetFormat + description: Format of the training dataset. + EfficiencyConfig: + properties: + enable_activation_checkpointing: + anyOf: + - type: boolean + - type: 'null' + default: false + enable_activation_offloading: + anyOf: + - type: boolean + - type: 'null' + default: false + memory_efficient_fsdp_wrap: + anyOf: + - type: boolean + - type: 'null' + default: false + fsdp_cpu_offload: + anyOf: + - type: boolean + - type: 'null' + default: false + type: object + title: EfficiencyConfig + description: Configuration for memory and compute efficiency optimizations. + OptimizerConfig: + properties: + optimizer_type: + $ref: '#/components/schemas/OptimizerType' + lr: + type: number + title: Lr + weight_decay: + type: number + title: Weight Decay + num_warmup_steps: + type: integer + title: Num Warmup Steps + type: object + required: + - optimizer_type + - lr + - weight_decay + - num_warmup_steps + title: OptimizerConfig + description: Configuration parameters for the optimization algorithm. + OptimizerType: + type: string + enum: + - adam + - adamw + - sgd + title: OptimizerType + description: Available optimizer algorithms for training. + TrainingConfig: + properties: + n_epochs: + type: integer + title: N Epochs + max_steps_per_epoch: + type: integer + title: Max Steps Per Epoch + default: 1 + gradient_accumulation_steps: + type: integer + title: Gradient Accumulation Steps + default: 1 + max_validation_steps: + anyOf: + - type: integer + - type: 'null' + default: 1 + data_config: + anyOf: + - $ref: '#/components/schemas/DataConfig' + title: DataConfig + - type: 'null' + title: DataConfig + optimizer_config: + anyOf: + - $ref: '#/components/schemas/OptimizerConfig' + title: OptimizerConfig + - type: 'null' + title: OptimizerConfig + efficiency_config: + anyOf: + - $ref: '#/components/schemas/EfficiencyConfig' + title: EfficiencyConfig + - type: 'null' + title: EfficiencyConfig + dtype: + anyOf: + - type: string + - type: 'null' + default: bf16 + type: object + required: + - n_epochs + title: TrainingConfig + description: Comprehensive configuration for the training process. + PostTrainingJob: + properties: + job_uuid: + type: string + title: Job Uuid + type: object + required: + - job_uuid + title: PostTrainingJob + AlgorithmConfig: + discriminator: + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + title: LoraFinetuningConfig | QATFinetuningConfig + LoraFinetuningConfig: + properties: + type: + type: string + const: LoRA + title: Type + default: LoRA + lora_attn_modules: + items: + type: string + type: array + title: Lora Attn Modules + apply_lora_to_mlp: + type: boolean + title: Apply Lora To Mlp + apply_lora_to_output: + type: boolean + title: Apply Lora To Output + rank: + type: integer + title: Rank + alpha: + type: integer + title: Alpha + use_dora: + anyOf: + - type: boolean + - type: 'null' + default: false + quantize_base: + anyOf: + - type: boolean + - type: 'null' + default: false + type: object + required: + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: + properties: + type: + type: string + const: QAT + title: Type + default: QAT + quantizer_name: + type: string + title: Quantizer Name + group_size: + type: integer + title: Group Size + type: object + required: + - quantizer_name + - group_size + title: QATFinetuningConfig + description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + ParamType: + discriminator: + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + propertyName: type + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + DataSource: + discriminator: + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + propertyName: type + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + AllowedToolsFilter: + properties: + tool_names: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: AllowedToolsFilter + description: Filter configuration for restricting which MCP tools can be used. + ApprovalFilter: + properties: + always: + anyOf: + - items: + type: string + type: array + - type: 'null' + never: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: ApprovalFilter + description: Filter configuration for MCP tool approval requirements. + BatchError: + properties: + code: + anyOf: + - type: string + - type: 'null' + line: + anyOf: + - type: integer + - type: 'null' + message: + anyOf: + - type: string + - type: 'null' + param: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: BatchError + BatchRequestCounts: + properties: + completed: + type: integer + title: Completed + failed: + type: integer + title: Failed + total: + type: integer + title: Total + additionalProperties: true + type: object + required: + - completed + - failed + - total + title: BatchRequestCounts + BatchUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + input_tokens_details: + $ref: '#/components/schemas/InputTokensDetails' + output_tokens: + type: integer + title: Output Tokens + output_tokens_details: + $ref: '#/components/schemas/OutputTokensDetails' + total_tokens: + type: integer + title: Total Tokens + additionalProperties: true + type: object + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + Body_openai_upload_file_v1_files_post: + properties: + file: + type: string + format: binary + title: File + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + anyOf: + - $ref: '#/components/schemas/ExpiresAfter' + title: ExpiresAfter + - type: 'null' + title: ExpiresAfter + type: object + required: + - file + - purpose + title: Body_openai_upload_file_v1_files_post + Chunk-Input: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + Chunk-Output: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + title: string | list[ImageContentItem-Output | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + ConversationItemInclude: + type: string + enum: + - web_search_call.action.sources + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + title: ConversationItemInclude + description: Specify additional output data to include in the model response. + DatasetPurpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + title: DatasetPurpose + description: Purpose of the dataset. Each purpose has a required input data schema. + Errors: + properties: + data: + anyOf: + - items: + $ref: '#/components/schemas/BatchError' + type: array + - type: 'null' + object: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: Errors + HealthStatus: + type: string + enum: + - OK + - Error + - Not Implemented + title: HealthStatus + ImageContentItem-Input: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + ImageContentItem-Output: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + InputTokensDetails: + properties: + cached_tokens: + type: integer + title: Cached Tokens + additionalProperties: true + type: object + required: + - cached_tokens + title: InputTokensDetails + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus + description: Status of a job execution. + MCPListToolsTool: + properties: + input_schema: + additionalProperties: true + type: object + title: Input Schema + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_schema + - name + title: MCPListToolsTool + description: Tool definition returned by MCP list tools operation. + OpenAIAssistantMessageParam-Input: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIAssistantMessageParam-Output: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIChatCompletionUsageCompletionTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsageCompletionTokensDetails + description: Token details for output tokens in OpenAI chat completion usage. + OpenAIChatCompletionUsagePromptTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsagePromptTokensDetails + description: Token details for prompt tokens in OpenAI chat completion usage. + OpenAIResponseMessage-Input: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseMessage-Output: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseOutputMessageFileSearchToolCallResults: + properties: + attributes: + additionalProperties: true + type: object + title: Attributes + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + text: + type: string + title: Text + type: object + required: + - attributes + - file_id + - filename + - score + - text + title: OpenAIResponseOutputMessageFileSearchToolCallResults + description: Search results returned by the file search operation. + OpenAIResponseTextFormat: + properties: + type: + title: Type + type: string + enum: + - text + - json_schema + - json_object + default: text + name: + anyOf: + - type: string + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + title: OpenAIResponseTextFormat + description: Configuration for Responses API text format. + OpenAIResponseUsageInputTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageInputTokensDetails + description: Token details for input tokens in OpenAI response usage. + OpenAIResponseUsageOutputTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageOutputTokensDetails + description: Token details for output tokens in OpenAI response usage. + OpenAIUserMessageParam-Input: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OpenAIUserMessageParam-Output: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OutputTokensDetails: + properties: + reasoning_tokens: + type: integer + title: Reasoning Tokens + additionalProperties: true + type: object + required: + - reasoning_tokens + title: OutputTokensDetails + SearchRankingOptions: + properties: + ranker: + anyOf: + - type: string + - type: 'null' + score_threshold: + anyOf: + - type: number + - type: 'null' + default: 0.0 + type: object + title: SearchRankingOptions + description: Options for ranking and filtering search results. + _URLOrData: + properties: + url: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + data: + anyOf: + - type: string + - type: 'null' + contentEncoding: base64 + type: object + title: _URLOrData + description: A URL or a base64 encoded string + SamplingStrategy: + discriminator: + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + propertyName: type + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + GrammarResponseFormat: + description: Configuration for grammar-guided response generation. + properties: + type: + const: grammar + default: grammar + title: Type + type: string + bnf: + additionalProperties: true + title: Bnf + type: object + required: + - bnf + title: GrammarResponseFormat + type: object + JsonSchemaResponseFormat: + description: Configuration for JSON schema-guided response generation. + properties: + type: + const: json_schema + default: json_schema + title: Type + type: string + json_schema: + additionalProperties: true + title: Json Schema + type: object + required: + - json_schema + title: JsonSchemaResponseFormat + type: object + ResponseFormat: + discriminator: + mapping: + grammar: '#/components/schemas/GrammarResponseFormat' + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + propertyName: type + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + title: JsonSchemaResponseFormat + - $ref: '#/components/schemas/GrammarResponseFormat' + title: GrammarResponseFormat + title: JsonSchemaResponseFormat | GrammarResponseFormat + OpenAIResponseContentPart: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + MetricInResponse: + description: A metric value included in API responses. + properties: + metric: + title: Metric + type: string + value: + anyOf: + - type: integer + - type: number + title: integer | number + unit: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - metric + - value + title: MetricInResponse + type: object + TextDelta: + description: A text content delta for streaming responses. + properties: + type: + const: text + default: text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: TextDelta + type: object + ImageDelta: + description: An image content delta for streaming responses. + properties: + type: + const: image + default: image + title: Type + type: string + image: + format: binary + title: Image + type: string + required: + - image + title: ImageDelta + type: object + Fp8QuantizationConfig: + description: Configuration for 8-bit floating point quantization. + properties: + type: + const: fp8_mixed + default: fp8_mixed + title: Type + type: string + title: Fp8QuantizationConfig + type: object + Bf16QuantizationConfig: + description: Configuration for BFloat16 precision (typically no quantization). + properties: + type: + const: bf16 + default: bf16 + title: Type + type: string + title: Bf16QuantizationConfig + type: object + Int4QuantizationConfig: + description: Configuration for 4-bit integer quantization. + properties: + type: + const: int4_mixed + default: int4_mixed + title: Type + type: string + scheme: + anyOf: + - type: string + - type: 'null' + default: int4_weight_int8_dynamic_activation + title: Int4QuantizationConfig + type: object + UserMessage: + description: A message from the user in a chat conversation. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + context: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem | TextContentItem] + nullable: true + required: + - content + title: UserMessage + type: object + ToolResponseMessage: + description: A message representing the result of a tool invocation. + properties: + role: + const: tool + default: tool + title: Role + type: string + call_id: + title: Call Id + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + required: + - call_id + - content + title: ToolResponseMessage + type: object + TokenLogProbs: + description: Log probabilities for generated tokens. + properties: + logprobs_by_token: + additionalProperties: + type: number + title: Logprobs By Token + type: object + required: + - logprobs_by_token + title: TokenLogProbs + type: object + EmbeddingsResponse: + description: Response containing generated embeddings. + properties: + embeddings: + items: + items: + type: number + type: array + title: Embeddings + type: array + required: + - embeddings + title: EmbeddingsResponse + type: object + OpenAICompletionLogprobs: + description: |- + The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + properties: + text_offset: + anyOf: + - items: + type: integer + type: array + - type: 'null' + nullable: true + token_logprobs: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + tokens: + anyOf: + - items: + type: string + type: array + - type: 'null' + nullable: true + top_logprobs: + anyOf: + - items: + additionalProperties: + type: number + type: object + type: array + - type: 'null' + nullable: true + title: OpenAICompletionLogprobs + type: object + VectorStoreCreateRequest: + description: Request to create a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + file_ids: + items: + type: string + title: File Ids + type: array + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + chunking_strategy: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + additionalProperties: true + title: Metadata + type: object + title: VectorStoreCreateRequest + type: object + VectorStoreModifyRequest: + description: Request to modify a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + title: VectorStoreModifyRequest + type: object + VectorStoreSearchRequest: + description: Request to search a vector store. + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + max_num_results: + default: 10 + title: Max Num Results + type: integer + ranking_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + rewrite_query: + default: false + title: Rewrite Query + type: boolean + required: + - query + title: VectorStoreSearchRequest + type: object + DialogType: + description: Parameter type for dialog data with semantic output labels. + properties: + type: + const: dialog + default: dialog + title: Type + type: string + title: DialogType + type: object + ConversationMessage: + description: OpenAI-compatible message item for conversations. + properties: + id: + description: unique identifier for this message + title: Id + type: string + content: + description: message content + items: + additionalProperties: true + type: object + title: Content + type: array + role: + description: message role + title: Role + type: string + status: + description: message status + title: Status + type: string + type: + const: message + default: message + title: Type + type: string + object: + const: message + default: message + title: Object + type: string + required: + - id + - content + - role + - status + title: ConversationMessage + type: object + ConversationItemCreateRequest: + description: Request body for creating conversation items. + properties: + items: + description: Items to include in the conversation context. You may add up to 20 items at a time. + items: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + maxItems: 20 + title: Items + type: array + required: + - items + title: ConversationItemCreateRequest + type: object + ToolGroupInput: + description: Input data for registering a tool group. + properties: + toolgroup_id: + title: Toolgroup Id + type: string + provider_id: + title: Provider Id + type: string + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + nullable: true + title: URL + required: + - toolgroup_id + - provider_id + title: ToolGroupInput + type: object + Api: + description: Enumeration of all available APIs in the Llama Stack system. + enum: + - providers + - inference + - safety + - agents + - batches + - vector_io + - datasetio + - scoring + - eval + - post_training + - tool_runtime + - models + - shields + - vector_stores + - datasets + - scoring_functions + - benchmarks + - tool_groups + - files + - prompts + - conversations + - inspect + title: Api + type: string + ProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + required: + - api + - provider_type + - config_class + title: ProviderSpec + type: object + InlineProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + container_image: + anyOf: + - type: string + - type: 'null' + description: |2 + + The container image to use for this implementation. If one is provided, pip_packages will be ignored. + If a provider depends on other providers, the dependencies MUST NOT specify a container image. + nullable: true + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + title: InlineProviderSpec + type: object + RemoteProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + adapter_type: + description: Unique identifier for this adapter + title: Adapter Type + type: string + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + - adapter_type + title: RemoteProviderSpec + type: object + PostTrainingJobLogStream: + description: Stream of logs from a finetuning job. + properties: + job_uuid: + title: Job Uuid + type: string + log_lines: + items: + type: string + title: Log Lines + type: array + required: + - job_uuid + - log_lines + title: PostTrainingJobLogStream + type: object + RLHFAlgorithm: + description: Available reinforcement learning from human feedback algorithms. + enum: + - dpo + title: RLHFAlgorithm + type: string + PostTrainingRLHFRequest: + description: Request to finetune a model using reinforcement learning from human feedback. + properties: + job_uuid: + title: Job Uuid + type: string + finetuned_model: + $ref: '#/components/schemas/URL' + dataset_id: + title: Dataset Id + type: string + validation_dataset_id: + title: Validation Dataset Id + type: string + algorithm: + $ref: '#/components/schemas/RLHFAlgorithm' + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + hyperparam_search_config: + additionalProperties: true + title: Hyperparam Search Config + type: object + logger_config: + additionalProperties: true + title: Logger Config + type: object + required: + - job_uuid + - finetuned_model + - dataset_id + - validation_dataset_id + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + title: PostTrainingRLHFRequest + type: object responses: BadRequest400: description: The request was invalid or malformed @@ -10060,8 +11473,7 @@ components: title: Bad Request detail: The request was invalid or malformed TooManyRequests429: - description: >- - The client has sent too many requests in a given amount of time + description: The client has sent too many requests in a given amount of time content: application/json: schema: @@ -10069,11 +11481,9 @@ components: example: status: 429 title: Too Many Requests - detail: >- - You have exceeded the rate limit. Please try again later. + detail: You have exceeded the rate limit. Please try again later. InternalServerError500: - description: >- - The server encountered an unexpected error + description: The server encountered an unexpected error content: application/json: schema: @@ -10081,171 +11491,101 @@ components: example: status: 500 title: Internal Server Error - detail: >- - An unexpected error occurred. Our team has been notified. + detail: An unexpected error occurred DefaultError: - description: An unexpected error occurred + description: An error occurred content: application/json: schema: $ref: '#/components/schemas/Error' - example: - status: 0 - title: Error - detail: An unexpected error occurred -security: - - Default: [] tags: - - name: Agents - description: >- - APIs for creating and interacting with agentic systems. +- description: APIs for creating and interacting with agentic systems. + name: Agents + x-displayName: Agents +- description: |- + The API is designed to allow use of openai client libraries for seamless integration. + This API provides the following extensions: + - idempotent batch creation - ## Responses API + Note: This API is currently under active development and may undergo changes. + name: Batches + x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale. +- description: '' + name: Benchmarks +- description: Protocol for conversation management operations. + name: Conversations + x-displayName: Conversations +- description: '' + name: DatasetIO +- description: '' + name: Datasets +- description: Llama Stack Evaluation API for running evaluations on model and agent candidates. + name: Eval + x-displayName: Evaluations +- description: This API is used to upload documents that can be used with other Llama Stack APIs. + name: Files + x-displayName: Files +- description: |- + Llama Stack Inference API for generating completions, chat completions, and embeddings. - - The Responses API provides OpenAI-compatible functionality with enhanced capabilities - for dynamic, stateful interactions. - - - > **✅ STABLE**: This API is production-ready with backward compatibility guarantees. - Recommended for production applications. - - - ### ✅ Supported Tools - - - The Responses API supports the following tool types: - - - - **`web_search`**: Search the web for current information and real-time data - - - **`file_search`**: Search through uploaded files and vector stores - - Supports dynamic `vector_store_ids` per call - - Compatible with OpenAI file search patterns - - **`function`**: Call custom functions with JSON schema validation - - - **`mcp_tool`**: Model Context Protocol integration - - - ### ✅ Supported Fields & Features - - - **Core Capabilities:** - - - **Dynamic Configuration**: Switch models, vector stores, and tools per request - without pre-configuration - - - **Conversation Branching**: Use `previous_response_id` to branch conversations - and explore different paths - - - **Rich Annotations**: Automatic file citations, URL citations, and container - file citations - - - **Status Tracking**: Monitor tool call execution status and handle failures - gracefully - - - ### 🚧 Work in Progress - - - - Full real-time response streaming support - - - `tool_choice` parameter - - - `max_tool_calls` parameter - - - Built-in tools (code interpreter, containers API) - - - Safety & guardrails - - - `reasoning` capabilities - - - `service_tier` - - - `logprobs` - - - `max_output_tokens` - - - `metadata` handling - - - `instructions` - - - `incomplete_details` - - - `background` - x-displayName: Agents - - name: Conversations - description: >- - Protocol for conversation management operations. - x-displayName: Conversations - - name: Files - description: >- - This API is used to upload documents that can be used with other Llama Stack - APIs. - x-displayName: Files - - name: Inference - description: >- - Llama Stack Inference API for generating completions, chat completions, and - embeddings. - - - This API provides the raw interface to the underlying models. Two kinds of models - are supported: - - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - - Embedding models: these models generate embeddings to be used for semantic - search. - x-displayName: Inference - - name: Inspect - description: >- - APIs for inspecting the Llama Stack service, including health status, available - API routes with methods and implementing providers. - x-displayName: Inspect - - name: Models - description: '' - - name: Prompts - description: >- - Protocol for prompt management operations. - x-displayName: Prompts - - name: Providers - description: >- - Providers API for inspecting, listing, and modifying providers and their configurations. - x-displayName: Providers - - name: Safety - description: OpenAI-compatible Moderations API. - x-displayName: Safety - - name: Scoring - description: '' - - name: ScoringFunctions - description: '' - - name: Shields - description: '' - - name: SyntheticDataGeneration (Coming Soon) - description: '' - - name: ToolGroups - description: '' - - name: ToolRuntime - description: '' - - name: VectorIO - description: '' + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + name: Inference + x-displayName: Inference +- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + name: Inspect + x-displayName: Inspect +- description: '' + name: Models +- description: '' + name: PostTraining (Coming Soon) +- description: Protocol for prompt management operations. + name: Prompts + x-displayName: Prompts +- description: Providers API for inspecting, listing, and modifying providers and their configurations. + name: Providers + x-displayName: Providers +- description: OpenAI-compatible Moderations API. + name: Safety + x-displayName: Safety +- description: '' + name: Scoring +- description: '' + name: ScoringFunctions +- description: '' + name: Shields +- description: '' + name: ToolGroups +- description: '' + name: ToolRuntime +- description: '' + name: VectorIO x-tagGroups: - - name: Operations - tags: - - Agents - - Conversations - - Files - - Inference - - Inspect - - Models - - Prompts - - Providers - - Safety - - Scoring - - ScoringFunctions - - Shields - - SyntheticDataGeneration (Coming Soon) - - ToolGroups - - ToolRuntime - - VectorIO +- name: Operations + tags: + - Agents + - Batches + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - ToolGroups + - ToolRuntime + - VectorIO +security: +- Default: [] diff --git a/docs/static/openai-spec-2.3.0.yml b/docs/static/openai-spec-2.3.0.yml new file mode 100644 index 000000000..8cdfaaf1f --- /dev/null +++ b/docs/static/openai-spec-2.3.0.yml @@ -0,0 +1,66741 @@ +openapi: 3.1.0 +info: + title: OpenAI API + description: The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details. + version: 2.3.0 + termsOfService: https://openai.com/policies/terms-of-use + contact: + name: OpenAI Support + url: https://help.openai.com/ + license: + name: MIT + url: https://github.com/openai/openai-openapi/blob/master/LICENSE +servers: + - url: https://api.openai.com/v1 +security: + - ApiKeyAuth: [] +tags: + - name: Assistants + description: Build Assistants that can call models and use tools. + - name: Audio + description: Turn audio into text or text into audio. + - name: Chat + description: Given a list of messages comprising a conversation, the model will return a response. + - name: Conversations + description: Manage conversations and conversation items. + - name: Completions + description: >- + Given a prompt, the model will return one or more predicted completions, and can also return the + probabilities of alternative tokens at each position. + - name: Embeddings + description: >- + Get a vector representation of a given input that can be easily consumed by machine learning models and + algorithms. + - name: Evals + description: Manage and run evals in the OpenAI platform. + - name: Fine-tuning + description: Manage fine-tuning jobs to tailor a model to your specific training data. + - name: Graders + description: Manage and run graders in the OpenAI platform. + - name: Batch + description: Create large batches of API requests to run asynchronously. + - name: Files + description: Files are used to upload documents that can be used with features like Assistants and Fine-tuning. + - name: Uploads + description: Use Uploads to upload large files in multiple parts. + - name: Images + description: Given a prompt and/or an input image, the model will generate a new image. + - name: Models + description: List and describe the various models available in the API. + - name: Moderations + description: Given text and/or image inputs, classifies if those inputs are potentially harmful. + - name: Audit Logs + description: List user actions and configuration changes within this organization. +paths: + /assistants: + get: + operationId: listAssistants + tags: + - Assistants + summary: List assistants + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListAssistantsResponse' + x-oaiMeta: + name: List assistants + group: assistants + beta: true + returns: A list of [assistant](https://platform.openai.com/docs/api-reference/assistants/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1698982736, + "name": "Coding Tutor", + "description": null, + "model": "gpt-4o", + "instructions": "You are a helpful assistant designed to make me better at coding!", + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + }, + { + "id": "asst_abc456", + "object": "assistant", + "created_at": 1698982718, + "name": "My Assistant", + "description": null, + "model": "gpt-4o", + "instructions": "You are a helpful assistant designed to make me better at coding!", + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + }, + { + "id": "asst_abc789", + "object": "assistant", + "created_at": 1698982643, + "name": null, + "description": null, + "model": "gpt-4o", + "instructions": null, + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + ], + "first_id": "asst_abc123", + "last_id": "asst_abc789", + "has_more": false + } + request: + curl: | + curl "https://api.openai.com/v1/assistants?order=desc&limit=20" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.assistants.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const assistant of client.beta.assistants.list()) { + console.log(assistant.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.Assistants.List(context.TODO(), openai.BetaAssistantListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.assistants.AssistantListPage; + import com.openai.models.beta.assistants.AssistantListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + AssistantListPage page = client.beta().assistants().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.assistants.list + + puts(page) + description: Returns a list of assistants. + post: + operationId: createAssistant + tags: + - Assistants + summary: Create assistant + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateAssistantRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/AssistantObject' + x-oaiMeta: + name: Create assistant + group: assistants + beta: true + returns: An [assistant](https://platform.openai.com/docs/api-reference/assistants/object) object. + examples: + - title: Code Interpreter + request: + curl: | + curl "https://api.openai.com/v1/assistants" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "instructions": "You are a personal math tutor. When asked a question, write and run Python code to answer the question.", + "name": "Math Tutor", + "tools": [{"type": "code_interpreter"}], + "model": "gpt-4o" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + assistant = client.beta.assistants.create( + model="gpt-4o", + ) + print(assistant.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const assistant = await client.beta.assistants.create({ model: 'gpt-4o' }); + + console.log(assistant.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + assistant, err := client.Beta.Assistants.New(context.TODO(), openai.BetaAssistantNewParams{ + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", assistant.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.beta.assistants.Assistant; + import com.openai.models.beta.assistants.AssistantCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + AssistantCreateParams params = AssistantCreateParams.builder() + .model(ChatModel.GPT_5_1) + .build(); + Assistant assistant = client.beta().assistants().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + assistant = openai.beta.assistants.create(model: :"gpt-5.1") + + puts(assistant) + response: | + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1698984975, + "name": "Math Tutor", + "description": null, + "model": "gpt-4o", + "instructions": "You are a personal math tutor. When asked a question, write and run Python code to answer the question.", + "tools": [ + { + "type": "code_interpreter" + } + ], + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + - title: Files + request: + curl: | + curl https://api.openai.com/v1/assistants \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "instructions": "You are an HR bot, and you have access to files to answer employee questions about company policies.", + "tools": [{"type": "file_search"}], + "tool_resources": {"file_search": {"vector_store_ids": ["vs_123"]}}, + "model": "gpt-4o" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + assistant = client.beta.assistants.create( + model="gpt-4o", + ) + print(assistant.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const assistant = await client.beta.assistants.create({ model: 'gpt-4o' }); + + console.log(assistant.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + assistant, err := client.Beta.Assistants.New(context.TODO(), openai.BetaAssistantNewParams{ + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", assistant.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.beta.assistants.Assistant; + import com.openai.models.beta.assistants.AssistantCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + AssistantCreateParams params = AssistantCreateParams.builder() + .model(ChatModel.GPT_5_1) + .build(); + Assistant assistant = client.beta().assistants().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + assistant = openai.beta.assistants.create(model: :"gpt-5.1") + + puts(assistant) + response: | + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1699009403, + "name": "HR Helper", + "description": null, + "model": "gpt-4o", + "instructions": "You are an HR bot, and you have access to files to answer employee questions about company policies.", + "tools": [ + { + "type": "file_search" + } + ], + "tool_resources": { + "file_search": { + "vector_store_ids": ["vs_123"] + } + }, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + description: Create an assistant with a model and instructions. + /assistants/{assistant_id}: + get: + operationId: getAssistant + tags: + - Assistants + summary: Retrieve assistant + parameters: + - in: path + name: assistant_id + required: true + schema: + type: string + description: The ID of the assistant to retrieve. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/AssistantObject' + x-oaiMeta: + name: Retrieve assistant + group: assistants + beta: true + returns: >- + The [assistant](https://platform.openai.com/docs/api-reference/assistants/object) object matching + the specified ID. + examples: + response: | + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1699009709, + "name": "HR Helper", + "description": null, + "model": "gpt-4o", + "instructions": "You are an HR bot, and you have access to files to answer employee questions about company policies.", + "tools": [ + { + "type": "file_search" + } + ], + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + request: + curl: | + curl https://api.openai.com/v1/assistants/asst_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + assistant = client.beta.assistants.retrieve( + "assistant_id", + ) + print(assistant.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const assistant = await client.beta.assistants.retrieve('assistant_id'); + + console.log(assistant.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + assistant, err := client.Beta.Assistants.Get(context.TODO(), "assistant_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", assistant.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.assistants.Assistant; + import com.openai.models.beta.assistants.AssistantRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Assistant assistant = client.beta().assistants().retrieve("assistant_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + assistant = openai.beta.assistants.retrieve("assistant_id") + + puts(assistant) + description: Retrieves an assistant. + post: + operationId: modifyAssistant + tags: + - Assistants + summary: Modify assistant + parameters: + - in: path + name: assistant_id + required: true + schema: + type: string + description: The ID of the assistant to modify. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ModifyAssistantRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/AssistantObject' + x-oaiMeta: + name: Modify assistant + group: assistants + beta: true + returns: The modified [assistant](https://platform.openai.com/docs/api-reference/assistants/object) object. + examples: + response: | + { + "id": "asst_123", + "object": "assistant", + "created_at": 1699009709, + "name": "HR Helper", + "description": null, + "model": "gpt-4o", + "instructions": "You are an HR bot, and you have access to files to answer employee questions about company policies. Always response with info from either of the files.", + "tools": [ + { + "type": "file_search" + } + ], + "tool_resources": { + "file_search": { + "vector_store_ids": [] + } + }, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + request: + curl: | + curl https://api.openai.com/v1/assistants/asst_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "instructions": "You are an HR bot, and you have access to files to answer employee questions about company policies. Always response with info from either of the files.", + "tools": [{"type": "file_search"}], + "model": "gpt-4o" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + assistant = client.beta.assistants.update( + assistant_id="assistant_id", + ) + print(assistant.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const assistant = await client.beta.assistants.update('assistant_id'); + + console.log(assistant.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + assistant, err := client.Beta.Assistants.Update( + context.TODO(), + "assistant_id", + openai.BetaAssistantUpdateParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", assistant.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.assistants.Assistant; + import com.openai.models.beta.assistants.AssistantUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Assistant assistant = client.beta().assistants().update("assistant_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + assistant = openai.beta.assistants.update("assistant_id") + + puts(assistant) + description: Modifies an assistant. + delete: + operationId: deleteAssistant + tags: + - Assistants + summary: Delete assistant + parameters: + - in: path + name: assistant_id + required: true + schema: + type: string + description: The ID of the assistant to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteAssistantResponse' + x-oaiMeta: + name: Delete assistant + group: assistants + beta: true + returns: Deletion status + examples: + response: | + { + "id": "asst_abc123", + "object": "assistant.deleted", + "deleted": true + } + request: + curl: | + curl https://api.openai.com/v1/assistants/asst_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -X DELETE + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + assistant_deleted = client.beta.assistants.delete( + "assistant_id", + ) + print(assistant_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const assistantDeleted = await client.beta.assistants.delete('assistant_id'); + + console.log(assistantDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + assistantDeleted, err := client.Beta.Assistants.Delete(context.TODO(), "assistant_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", assistantDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.assistants.AssistantDeleteParams; + import com.openai.models.beta.assistants.AssistantDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + AssistantDeleted assistantDeleted = client.beta().assistants().delete("assistant_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + assistant_deleted = openai.beta.assistants.delete("assistant_id") + + puts(assistant_deleted) + description: Delete an assistant. + /audio/speech: + post: + operationId: createSpeech + tags: + - Audio + summary: Create speech + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateSpeechRequest' + responses: + '200': + description: OK + headers: + Transfer-Encoding: + schema: + type: string + description: chunked + content: + application/octet-stream: + schema: + type: string + format: binary + text/event-stream: + schema: + $ref: '#/components/schemas/CreateSpeechResponseStreamEvent' + x-oaiMeta: + name: Create speech + group: audio + returns: >- + The audio file content or a [stream of audio + events](https://platform.openai.com/docs/api-reference/audio/speech-audio-delta-event). + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/audio/speech \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-mini-tts", + "input": "The quick brown fox jumped over the lazy dog.", + "voice": "alloy" + }' \ + --output speech.mp3 + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + speech = client.audio.speech.create( + input="input", + model="string", + voice="ash", + ) + print(speech) + content = speech.read() + print(content) + javascript: | + import fs from "fs"; + import path from "path"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const speechFile = path.resolve("./speech.mp3"); + + async function main() { + const mp3 = await openai.audio.speech.create({ + model: "gpt-4o-mini-tts", + voice: "alloy", + input: "Today is a wonderful day to build something people love!", + }); + console.log(speechFile); + const buffer = Buffer.from(await mp3.arrayBuffer()); + await fs.promises.writeFile(speechFile, buffer); + } + main(); + csharp: | + using System; + using System.IO; + + using OpenAI.Audio; + + AudioClient client = new( + model: "gpt-4o-mini-tts", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + BinaryData speech = client.GenerateSpeech( + text: "The quick brown fox jumped over the lazy dog.", + voice: GeneratedSpeechVoice.Alloy + ); + + using FileStream stream = File.OpenWrite("speech.mp3"); + speech.ToStream().CopyTo(stream); + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const speech = await client.audio.speech.create({ input: 'input', model: 'string', voice: + 'ash' }); + + + console.log(speech); + + + const content = await speech.blob(); + + console.log(content); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + speech, err := client.Audio.Speech.New(context.TODO(), openai.AudioSpeechNewParams{ + Input: "input", + Model: openai.SpeechModelTTS1, + Voice: openai.AudioSpeechNewParamsVoiceAlloy, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", speech) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.http.HttpResponse; + import com.openai.models.audio.speech.SpeechCreateParams; + import com.openai.models.audio.speech.SpeechModel; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + SpeechCreateParams params = SpeechCreateParams.builder() + .input("input") + .model(SpeechModel.TTS_1) + .voice(SpeechCreateParams.Voice.ALLOY) + .build(); + HttpResponse speech = client.audio().speech().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + speech = openai.audio.speech.create(input: "input", model: :"tts-1", voice: :alloy) + + puts(speech) + - title: SSE Stream Format + request: + curl: | + curl https://api.openai.com/v1/audio/speech \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-mini-tts", + "input": "The quick brown fox jumped over the lazy dog.", + "voice": "alloy", + "stream_format": "sse" + }' + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const speech = await client.audio.speech.create({ input: 'input', model: 'string', voice: + 'ash' }); + + + console.log(speech); + + + const content = await speech.blob(); + + console.log(content); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + speech = client.audio.speech.create( + input="input", + model="string", + voice="ash", + ) + print(speech) + content = speech.read() + print(content) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + speech, err := client.Audio.Speech.New(context.TODO(), openai.AudioSpeechNewParams{ + Input: "input", + Model: openai.SpeechModelTTS1, + Voice: openai.AudioSpeechNewParamsVoiceAlloy, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", speech) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.http.HttpResponse; + import com.openai.models.audio.speech.SpeechCreateParams; + import com.openai.models.audio.speech.SpeechModel; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + SpeechCreateParams params = SpeechCreateParams.builder() + .input("input") + .model(SpeechModel.TTS_1) + .voice(SpeechCreateParams.Voice.ALLOY) + .build(); + HttpResponse speech = client.audio().speech().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + speech = openai.audio.speech.create(input: "input", model: :"tts-1", voice: :alloy) + + puts(speech) + description: Generates audio from the input text. + /audio/transcriptions: + post: + operationId: createTranscription + tags: + - Audio + summary: Create transcription + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateTranscriptionRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + anyOf: + - $ref: '#/components/schemas/CreateTranscriptionResponseJson' + - $ref: '#/components/schemas/CreateTranscriptionResponseDiarizedJson' + x-stainless-skip: + - go + - $ref: '#/components/schemas/CreateTranscriptionResponseVerboseJson' + discriminator: + propertyName: task + text/event-stream: + schema: + $ref: '#/components/schemas/CreateTranscriptionResponseStreamEvent' + x-oaiMeta: + name: Create transcription + group: audio + returns: >- + The [transcription object](https://platform.openai.com/docs/api-reference/audio/json-object), a + [diarized transcription + object](https://platform.openai.com/docs/api-reference/audio/diarized-json-object), a [verbose + transcription object](https://platform.openai.com/docs/api-reference/audio/verbose-json-object), or + a [stream of transcript + events](https://platform.openai.com/docs/api-reference/audio/transcript-text-delta-event). + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/audio.mp3" \ + -F model="gpt-4o-transcribe" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const transcription = await openai.audio.transcriptions.create({ + file: fs.createReadStream("audio.mp3"), + model: "gpt-4o-transcribe", + }); + + console.log(transcription.text); + } + main(); + csharp: | + using System; + + using OpenAI.Audio; + string audioFilePath = "audio.mp3"; + + AudioClient client = new( + model: "gpt-4o-transcribe", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + AudioTranscription transcription = client.TranscribeAudio(audioFilePath); + + Console.WriteLine($"{transcription.Text}"); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: | + { + "text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that.", + "usage": { + "type": "tokens", + "input_tokens": 14, + "input_token_details": { + "text_tokens": 0, + "audio_tokens": 14 + }, + "output_tokens": 45, + "total_tokens": 59 + } + } + - title: Diarization + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/meeting.wav" \ + -F model="gpt-4o-transcribe-diarize" \ + -F response_format="diarized_json" \ + -F chunking_strategy=auto \ + -F 'known_speaker_names[]=agent' \ + -F 'known_speaker_references[]=data:audio/wav;base64,AAA...' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const speakerRef = fs.readFileSync("agent.wav").toString("base64"); + + const transcript = await openai.audio.transcriptions.create({ + file: fs.createReadStream("meeting.wav"), + model: "gpt-4o-transcribe-diarize", + response_format: "diarized_json", + chunking_strategy: "auto", + extra_body: { + known_speaker_names: ["agent"], + known_speaker_references: [`data:audio/wav;base64,${speakerRef}`], + }, + }); + + console.log(transcript.segments); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: | + { + "task": "transcribe", + "duration": 27.4, + "text": "Agent: Thanks for calling OpenAI support.\nA: Hi, I'm trying to enable diarization.\nAgent: Happy to walk you through the steps.", + "segments": [ + { + "type": "transcript.text.segment", + "id": "seg_001", + "start": 0.0, + "end": 4.7, + "text": "Thanks for calling OpenAI support.", + "speaker": "agent" + }, + { + "type": "transcript.text.segment", + "id": "seg_002", + "start": 4.7, + "end": 11.8, + "text": "Hi, I'm trying to enable diarization.", + "speaker": "A" + }, + { + "type": "transcript.text.segment", + "id": "seg_003", + "start": 12.1, + "end": 18.5, + "text": "Happy to walk you through the steps.", + "speaker": "agent" + } + ], + "usage": { + "type": "duration", + "seconds": 27 + } + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/audio.mp3" \ + -F model="gpt-4o-mini-transcribe" \ + -F stream=true + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const stream = await openai.audio.transcriptions.create({ + file: fs.createReadStream("audio.mp3"), + model: "gpt-4o-mini-transcribe", + stream: true, + }); + + for await (const event of stream) { + console.log(event); + } + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: > + data: + {"type":"transcript.text.delta","delta":"I","logprobs":[{"token":"I","logprob":-0.00007588794,"bytes":[73]}]} + + + data: {"type":"transcript.text.delta","delta":" see","logprobs":[{"token":" + see","logprob":-3.1281633e-7,"bytes":[32,115,101,101]}]} + + + data: {"type":"transcript.text.delta","delta":" skies","logprobs":[{"token":" + skies","logprob":-2.3392786e-6,"bytes":[32,115,107,105,101,115]}]} + + + data: {"type":"transcript.text.delta","delta":" of","logprobs":[{"token":" + of","logprob":-3.1281633e-7,"bytes":[32,111,102]}]} + + + data: {"type":"transcript.text.delta","delta":" blue","logprobs":[{"token":" + blue","logprob":-1.0280384e-6,"bytes":[32,98,108,117,101]}]} + + + data: {"type":"transcript.text.delta","delta":" and","logprobs":[{"token":" + and","logprob":-0.0005108566,"bytes":[32,97,110,100]}]} + + + data: {"type":"transcript.text.delta","delta":" clouds","logprobs":[{"token":" + clouds","logprob":-1.9361265e-7,"bytes":[32,99,108,111,117,100,115]}]} + + + data: {"type":"transcript.text.delta","delta":" of","logprobs":[{"token":" + of","logprob":-1.9361265e-7,"bytes":[32,111,102]}]} + + + data: {"type":"transcript.text.delta","delta":" white","logprobs":[{"token":" + white","logprob":-7.89631e-7,"bytes":[32,119,104,105,116,101]}]} + + + data: + {"type":"transcript.text.delta","delta":",","logprobs":[{"token":",","logprob":-0.0014890312,"bytes":[44]}]} + + + data: {"type":"transcript.text.delta","delta":" the","logprobs":[{"token":" + the","logprob":-0.0110956915,"bytes":[32,116,104,101]}]} + + + data: {"type":"transcript.text.delta","delta":" bright","logprobs":[{"token":" + bright","logprob":0.0,"bytes":[32,98,114,105,103,104,116]}]} + + + data: {"type":"transcript.text.delta","delta":" blessed","logprobs":[{"token":" + blessed","logprob":-0.000045848617,"bytes":[32,98,108,101,115,115,101,100]}]} + + + data: {"type":"transcript.text.delta","delta":" days","logprobs":[{"token":" + days","logprob":-0.000010802739,"bytes":[32,100,97,121,115]}]} + + + data: + {"type":"transcript.text.delta","delta":",","logprobs":[{"token":",","logprob":-0.00001700133,"bytes":[44]}]} + + + data: {"type":"transcript.text.delta","delta":" the","logprobs":[{"token":" + the","logprob":-0.0000118755715,"bytes":[32,116,104,101]}]} + + + data: {"type":"transcript.text.delta","delta":" dark","logprobs":[{"token":" + dark","logprob":-5.5122365e-7,"bytes":[32,100,97,114,107]}]} + + + data: {"type":"transcript.text.delta","delta":" sacred","logprobs":[{"token":" + sacred","logprob":-5.4385737e-6,"bytes":[32,115,97,99,114,101,100]}]} + + + data: {"type":"transcript.text.delta","delta":" nights","logprobs":[{"token":" + nights","logprob":-4.00813e-6,"bytes":[32,110,105,103,104,116,115]}]} + + + data: + {"type":"transcript.text.delta","delta":",","logprobs":[{"token":",","logprob":-0.0036910512,"bytes":[44]}]} + + + data: {"type":"transcript.text.delta","delta":" and","logprobs":[{"token":" + and","logprob":-0.0031903093,"bytes":[32,97,110,100]}]} + + + data: {"type":"transcript.text.delta","delta":" I","logprobs":[{"token":" + I","logprob":-1.504853e-6,"bytes":[32,73]}]} + + + data: {"type":"transcript.text.delta","delta":" think","logprobs":[{"token":" + think","logprob":-4.3202e-7,"bytes":[32,116,104,105,110,107]}]} + + + data: {"type":"transcript.text.delta","delta":" to","logprobs":[{"token":" + to","logprob":-1.9361265e-7,"bytes":[32,116,111]}]} + + + data: {"type":"transcript.text.delta","delta":" myself","logprobs":[{"token":" + myself","logprob":-1.7432603e-6,"bytes":[32,109,121,115,101,108,102]}]} + + + data: + {"type":"transcript.text.delta","delta":",","logprobs":[{"token":",","logprob":-0.29254505,"bytes":[44]}]} + + + data: {"type":"transcript.text.delta","delta":" what","logprobs":[{"token":" + what","logprob":-0.016815351,"bytes":[32,119,104,97,116]}]} + + + data: {"type":"transcript.text.delta","delta":" a","logprobs":[{"token":" + a","logprob":-3.1281633e-7,"bytes":[32,97]}]} + + + data: {"type":"transcript.text.delta","delta":" wonderful","logprobs":[{"token":" + wonderful","logprob":-2.1008714e-6,"bytes":[32,119,111,110,100,101,114,102,117,108]}]} + + + data: {"type":"transcript.text.delta","delta":" world","logprobs":[{"token":" + world","logprob":-8.180258e-6,"bytes":[32,119,111,114,108,100]}]} + + + data: + {"type":"transcript.text.delta","delta":".","logprobs":[{"token":".","logprob":-0.014231676,"bytes":[46]}]} + + + data: {"type":"transcript.text.done","text":"I see skies of blue and clouds of white, the bright + blessed days, the dark sacred nights, and I think to myself, what a wonderful + world.","logprobs":[{"token":"I","logprob":-0.00007588794,"bytes":[73]},{"token":" + see","logprob":-3.1281633e-7,"bytes":[32,115,101,101]},{"token":" + skies","logprob":-2.3392786e-6,"bytes":[32,115,107,105,101,115]},{"token":" + of","logprob":-3.1281633e-7,"bytes":[32,111,102]},{"token":" + blue","logprob":-1.0280384e-6,"bytes":[32,98,108,117,101]},{"token":" + and","logprob":-0.0005108566,"bytes":[32,97,110,100]},{"token":" + clouds","logprob":-1.9361265e-7,"bytes":[32,99,108,111,117,100,115]},{"token":" + of","logprob":-1.9361265e-7,"bytes":[32,111,102]},{"token":" + white","logprob":-7.89631e-7,"bytes":[32,119,104,105,116,101]},{"token":",","logprob":-0.0014890312,"bytes":[44]},{"token":" + the","logprob":-0.0110956915,"bytes":[32,116,104,101]},{"token":" + bright","logprob":0.0,"bytes":[32,98,114,105,103,104,116]},{"token":" + blessed","logprob":-0.000045848617,"bytes":[32,98,108,101,115,115,101,100]},{"token":" + days","logprob":-0.000010802739,"bytes":[32,100,97,121,115]},{"token":",","logprob":-0.00001700133,"bytes":[44]},{"token":" + the","logprob":-0.0000118755715,"bytes":[32,116,104,101]},{"token":" + dark","logprob":-5.5122365e-7,"bytes":[32,100,97,114,107]},{"token":" + sacred","logprob":-5.4385737e-6,"bytes":[32,115,97,99,114,101,100]},{"token":" + nights","logprob":-4.00813e-6,"bytes":[32,110,105,103,104,116,115]},{"token":",","logprob":-0.0036910512,"bytes":[44]},{"token":" + and","logprob":-0.0031903093,"bytes":[32,97,110,100]},{"token":" + I","logprob":-1.504853e-6,"bytes":[32,73]},{"token":" + think","logprob":-4.3202e-7,"bytes":[32,116,104,105,110,107]},{"token":" + to","logprob":-1.9361265e-7,"bytes":[32,116,111]},{"token":" + myself","logprob":-1.7432603e-6,"bytes":[32,109,121,115,101,108,102]},{"token":",","logprob":-0.29254505,"bytes":[44]},{"token":" + what","logprob":-0.016815351,"bytes":[32,119,104,97,116]},{"token":" + a","logprob":-3.1281633e-7,"bytes":[32,97]},{"token":" + wonderful","logprob":-2.1008714e-6,"bytes":[32,119,111,110,100,101,114,102,117,108]},{"token":" + world","logprob":-8.180258e-6,"bytes":[32,119,111,114,108,100]},{"token":".","logprob":-0.014231676,"bytes":[46]}],"usage":{"input_tokens":14,"input_token_details":{"text_tokens":0,"audio_tokens":14},"output_tokens":45,"total_tokens":59}} + - title: Logprobs + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/audio.mp3" \ + -F "include[]=logprobs" \ + -F model="gpt-4o-transcribe" \ + -F response_format="json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const transcription = await openai.audio.transcriptions.create({ + file: fs.createReadStream("audio.mp3"), + model: "gpt-4o-transcribe", + response_format: "json", + include: ["logprobs"] + }); + + console.log(transcription); + } + main(); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: | + { + "text": "Hey, my knee is hurting and I want to see the doctor tomorrow ideally.", + "logprobs": [ + { "token": "Hey", "logprob": -1.0415299, "bytes": [72, 101, 121] }, + { "token": ",", "logprob": -9.805982e-5, "bytes": [44] }, + { "token": " my", "logprob": -0.00229799, "bytes": [32, 109, 121] }, + { + "token": " knee", + "logprob": -4.7159858e-5, + "bytes": [32, 107, 110, 101, 101] + }, + { "token": " is", "logprob": -0.043909557, "bytes": [32, 105, 115] }, + { + "token": " hurting", + "logprob": -1.1041146e-5, + "bytes": [32, 104, 117, 114, 116, 105, 110, 103] + }, + { "token": " and", "logprob": -0.011076359, "bytes": [32, 97, 110, 100] }, + { "token": " I", "logprob": -5.3193703e-6, "bytes": [32, 73] }, + { + "token": " want", + "logprob": -0.0017156356, + "bytes": [32, 119, 97, 110, 116] + }, + { "token": " to", "logprob": -7.89631e-7, "bytes": [32, 116, 111] }, + { "token": " see", "logprob": -5.5122365e-7, "bytes": [32, 115, 101, 101] }, + { "token": " the", "logprob": -0.0040786397, "bytes": [32, 116, 104, 101] }, + { + "token": " doctor", + "logprob": -2.3392786e-6, + "bytes": [32, 100, 111, 99, 116, 111, 114] + }, + { + "token": " tomorrow", + "logprob": -7.89631e-7, + "bytes": [32, 116, 111, 109, 111, 114, 114, 111, 119] + }, + { + "token": " ideally", + "logprob": -0.5800861, + "bytes": [32, 105, 100, 101, 97, 108, 108, 121] + }, + { "token": ".", "logprob": -0.00011093382, "bytes": [46] } + ], + "usage": { + "type": "tokens", + "input_tokens": 14, + "input_token_details": { + "text_tokens": 0, + "audio_tokens": 14 + }, + "output_tokens": 45, + "total_tokens": 59 + } + } + - title: Word timestamps + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/audio.mp3" \ + -F "timestamp_granularities[]=word" \ + -F model="whisper-1" \ + -F response_format="verbose_json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const transcription = await openai.audio.transcriptions.create({ + file: fs.createReadStream("audio.mp3"), + model: "whisper-1", + response_format: "verbose_json", + timestamp_granularities: ["word"] + }); + + console.log(transcription.text); + } + main(); + csharp: | + using System; + + using OpenAI.Audio; + + string audioFilePath = "audio.mp3"; + + AudioClient client = new( + model: "whisper-1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + AudioTranscriptionOptions options = new() + { + ResponseFormat = AudioTranscriptionFormat.Verbose, + TimestampGranularities = AudioTimestampGranularities.Word, + }; + + AudioTranscription transcription = client.TranscribeAudio(audioFilePath, options); + + Console.WriteLine($"{transcription.Text}"); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: | + { + "task": "transcribe", + "language": "english", + "duration": 8.470000267028809, + "text": "The beach was a popular spot on a hot summer day. People were swimming in the ocean, building sandcastles, and playing beach volleyball.", + "words": [ + { + "word": "The", + "start": 0.0, + "end": 0.23999999463558197 + }, + ... + { + "word": "volleyball", + "start": 7.400000095367432, + "end": 7.900000095367432 + } + ], + "usage": { + "type": "duration", + "seconds": 9 + } + } + - title: Segment timestamps + request: + curl: | + curl https://api.openai.com/v1/audio/transcriptions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/audio.mp3" \ + -F "timestamp_granularities[]=segment" \ + -F model="whisper-1" \ + -F response_format="verbose_json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + transcription = client.audio.transcriptions.create( + file=b"raw file contents", + model="gpt-4o-transcribe", + ) + print(transcription) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const transcription = await openai.audio.transcriptions.create({ + file: fs.createReadStream("audio.mp3"), + model: "whisper-1", + response_format: "verbose_json", + timestamp_granularities: ["segment"] + }); + + console.log(transcription.text); + } + main(); + csharp: | + using System; + + using OpenAI.Audio; + + string audioFilePath = "audio.mp3"; + + AudioClient client = new( + model: "whisper-1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + AudioTranscriptionOptions options = new() + { + ResponseFormat = AudioTranscriptionFormat.Verbose, + TimestampGranularities = AudioTimestampGranularities.Segment, + }; + + AudioTranscription transcription = client.TranscribeAudio(audioFilePath, options); + + Console.WriteLine($"{transcription.Text}"); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const transcription = await client.audio.transcriptions.create({ + file: fs.createReadStream('speech.mp3'), + model: 'gpt-4o-transcribe', + }); + + console.log(transcription); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + transcription, err := client.Audio.Transcriptions.New(context.TODO(), openai.AudioTranscriptionNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", transcription) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.transcriptions.TranscriptionCreateParams; + import com.openai.models.audio.transcriptions.TranscriptionCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranscriptionCreateParams params = TranscriptionCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranscriptionCreateResponse transcription = client.audio().transcriptions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + transcription = openai.audio.transcriptions.create(file: Pathname(__FILE__), model: + :"whisper-1") + + + puts(transcription) + response: | + { + "task": "transcribe", + "language": "english", + "duration": 8.470000267028809, + "text": "The beach was a popular spot on a hot summer day. People were swimming in the ocean, building sandcastles, and playing beach volleyball.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 3.319999933242798, + "text": " The beach was a popular spot on a hot summer day.", + "tokens": [ + 50364, 440, 7534, 390, 257, 3743, 4008, 322, 257, 2368, 4266, 786, 13, 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.2860786020755768, + "compression_ratio": 1.2363636493682861, + "no_speech_prob": 0.00985979475080967 + }, + ... + ], + "usage": { + "type": "duration", + "seconds": 9 + } + } + description: Transcribes audio into the input language. + /audio/translations: + post: + operationId: createTranslation + tags: + - Audio + summary: Create translation + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateTranslationRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + anyOf: + - $ref: '#/components/schemas/CreateTranslationResponseJson' + - $ref: '#/components/schemas/CreateTranslationResponseVerboseJson' + x-stainless-skip: + - go + x-oaiMeta: + name: Create translation + group: audio + returns: The translated text. + examples: + response: | + { + "text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?" + } + request: + curl: | + curl https://api.openai.com/v1/audio/translations \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: multipart/form-data" \ + -F file="@/path/to/file/german.m4a" \ + -F model="whisper-1" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + translation = client.audio.translations.create( + file=b"raw file contents", + model="whisper-1", + ) + print(translation) + javascript: | + import fs from "fs"; + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const translation = await openai.audio.translations.create({ + file: fs.createReadStream("speech.mp3"), + model: "whisper-1", + }); + + console.log(translation.text); + } + main(); + csharp: | + using System; + + using OpenAI.Audio; + + string audioFilePath = "audio.mp3"; + + AudioClient client = new( + model: "whisper-1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + AudioTranscription transcription = client.TranscribeAudio(audioFilePath); + + Console.WriteLine($"{transcription.Text}"); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const translation = await client.audio.translations.create({ + file: fs.createReadStream('speech.mp3'), + model: 'whisper-1', + }); + + console.log(translation); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + translation, err := client.Audio.Translations.New(context.TODO(), openai.AudioTranslationNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Model: openai.AudioModelWhisper1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", translation) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.audio.AudioModel; + import com.openai.models.audio.translations.TranslationCreateParams; + import com.openai.models.audio.translations.TranslationCreateResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + TranslationCreateParams params = TranslationCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .model(AudioModel.WHISPER_1) + .build(); + TranslationCreateResponse translation = client.audio().translations().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + translation = openai.audio.translations.create(file: Pathname(__FILE__), model: :"whisper-1") + + puts(translation) + description: Translates audio into English. + /batches: + post: + summary: Create batch + operationId: createBatch + tags: + - Batch + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - input_file_id + - endpoint + - completion_window + properties: + input_file_id: + type: string + description: > + The ID of an uploaded file that contains requests for the new batch. + + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to + upload a file. + + + Your input file must be formatted as a [JSONL + file](https://platform.openai.com/docs/api-reference/batch/request-input), and must be + uploaded with the purpose `batch`. The file can contain up to 50,000 requests, and can be + up to 200 MB in size. + endpoint: + type: string + enum: + - /v1/responses + - /v1/chat/completions + - /v1/embeddings + - /v1/completions + - /v1/moderations + description: >- + The endpoint to be used for all requests in the batch. Currently `/v1/responses`, + `/v1/chat/completions`, `/v1/embeddings`, `/v1/completions`, and `/v1/moderations` are + supported. Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000 + embedding inputs across all requests in the batch. + completion_window: + type: string + enum: + - 24h + description: >- + The time frame within which the batch should be processed. Currently only `24h` is + supported. + metadata: + $ref: '#/components/schemas/Metadata' + output_expires_after: + $ref: '#/components/schemas/BatchFileExpirationAfter' + responses: + '200': + description: Batch created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + x-oaiMeta: + name: Create batch + group: batch + returns: The created [Batch](https://platform.openai.com/docs/api-reference/batch/object) object. + examples: + response: | + { + "id": "batch_abc123", + "object": "batch", + "endpoint": "/v1/chat/completions", + "errors": null, + "input_file_id": "file-abc123", + "completion_window": "24h", + "status": "validating", + "output_file_id": null, + "error_file_id": null, + "created_at": 1711471533, + "in_progress_at": null, + "expires_at": null, + "finalizing_at": null, + "completed_at": null, + "failed_at": null, + "expired_at": null, + "cancelling_at": null, + "cancelled_at": null, + "request_counts": { + "total": 0, + "completed": 0, + "failed": 0 + }, + "metadata": { + "customer_id": "user_123456789", + "batch_description": "Nightly eval job", + } + } + request: + curl: | + curl https://api.openai.com/v1/batches \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "input_file_id": "file-abc123", + "endpoint": "/v1/chat/completions", + "completion_window": "24h" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + batch = client.batches.create( + completion_window="24h", + endpoint="/v1/responses", + input_file_id="input_file_id", + ) + print(batch.id) + node: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const batch = await openai.batches.create({ + input_file_id: "file-abc123", + endpoint: "/v1/chat/completions", + completion_window: "24h" + }); + + console.log(batch); + } + + main(); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const batch = await client.batches.create({ + completion_window: '24h', + endpoint: '/v1/responses', + input_file_id: 'input_file_id', + }); + + console.log(batch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + batch, err := client.Batches.New(context.TODO(), openai.BatchNewParams{ + CompletionWindow: openai.BatchNewParamsCompletionWindow24h, + Endpoint: openai.BatchNewParamsEndpointV1Responses, + InputFileID: "input_file_id", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", batch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.batches.Batch; + import com.openai.models.batches.BatchCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + BatchCreateParams params = BatchCreateParams.builder() + .completionWindow(BatchCreateParams.CompletionWindow._24H) + .endpoint(BatchCreateParams.Endpoint.V1_RESPONSES) + .inputFileId("input_file_id") + .build(); + Batch batch = client.batches().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + batch = openai.batches.create( + completion_window: :"24h", + endpoint: :"/v1/responses", + input_file_id: "input_file_id" + ) + + puts(batch) + description: Creates and executes a batch from an uploaded file of requests + get: + operationId: listBatches + tags: + - Batch + summary: List batch + parameters: + - in: query + name: after + required: false + schema: + type: string + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + responses: + '200': + description: Batch listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + x-oaiMeta: + name: List batch + group: batch + returns: A list of paginated [Batch](https://platform.openai.com/docs/api-reference/batch/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "batch_abc123", + "object": "batch", + "endpoint": "/v1/chat/completions", + "errors": null, + "input_file_id": "file-abc123", + "completion_window": "24h", + "status": "completed", + "output_file_id": "file-cvaTdG", + "error_file_id": "file-HOWS94", + "created_at": 1711471533, + "in_progress_at": 1711471538, + "expires_at": 1711557933, + "finalizing_at": 1711493133, + "completed_at": 1711493163, + "failed_at": null, + "expired_at": null, + "cancelling_at": null, + "cancelled_at": null, + "request_counts": { + "total": 100, + "completed": 95, + "failed": 5 + }, + "metadata": { + "customer_id": "user_123456789", + "batch_description": "Nightly job", + } + }, + { ... }, + ], + "first_id": "batch_abc123", + "last_id": "batch_abc456", + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/batches?limit=2 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.batches.list() + page = page.data[0] + print(page.id) + node: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const list = await openai.batches.list(); + + for await (const batch of list) { + console.log(batch); + } + } + + main(); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const batch of client.batches.list()) { + console.log(batch.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Batches.List(context.TODO(), openai.BatchListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.batches.BatchListPage; + import com.openai.models.batches.BatchListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + BatchListPage page = client.batches().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.batches.list + + puts(page) + description: List your organization's batches. + /batches/{batch_id}: + get: + operationId: retrieveBatch + tags: + - Batch + summary: Retrieve batch + parameters: + - in: path + name: batch_id + required: true + schema: + type: string + description: The ID of the batch to retrieve. + responses: + '200': + description: Batch retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + x-oaiMeta: + name: Retrieve batch + group: batch + returns: >- + The [Batch](https://platform.openai.com/docs/api-reference/batch/object) object matching the + specified ID. + examples: + response: | + { + "id": "batch_abc123", + "object": "batch", + "endpoint": "/v1/completions", + "errors": null, + "input_file_id": "file-abc123", + "completion_window": "24h", + "status": "completed", + "output_file_id": "file-cvaTdG", + "error_file_id": "file-HOWS94", + "created_at": 1711471533, + "in_progress_at": 1711471538, + "expires_at": 1711557933, + "finalizing_at": 1711493133, + "completed_at": 1711493163, + "failed_at": null, + "expired_at": null, + "cancelling_at": null, + "cancelled_at": null, + "request_counts": { + "total": 100, + "completed": 95, + "failed": 5 + }, + "metadata": { + "customer_id": "user_123456789", + "batch_description": "Nightly eval job", + } + } + request: + curl: | + curl https://api.openai.com/v1/batches/batch_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + batch = client.batches.retrieve( + "batch_id", + ) + print(batch.id) + node: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const batch = await openai.batches.retrieve("batch_abc123"); + + console.log(batch); + } + + main(); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const batch = await client.batches.retrieve('batch_id'); + + console.log(batch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + batch, err := client.Batches.Get(context.TODO(), "batch_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", batch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.batches.Batch; + import com.openai.models.batches.BatchRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Batch batch = client.batches().retrieve("batch_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + batch = openai.batches.retrieve("batch_id") + + puts(batch) + description: Retrieves a batch. + /batches/{batch_id}/cancel: + post: + operationId: cancelBatch + tags: + - Batch + summary: Cancel batch + parameters: + - in: path + name: batch_id + required: true + schema: + type: string + description: The ID of the batch to cancel. + responses: + '200': + description: Batch is cancelling. Returns the cancelling batch's details. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + x-oaiMeta: + name: Cancel batch + group: batch + returns: >- + The [Batch](https://platform.openai.com/docs/api-reference/batch/object) object matching the + specified ID. + examples: + response: | + { + "id": "batch_abc123", + "object": "batch", + "endpoint": "/v1/chat/completions", + "errors": null, + "input_file_id": "file-abc123", + "completion_window": "24h", + "status": "cancelling", + "output_file_id": null, + "error_file_id": null, + "created_at": 1711471533, + "in_progress_at": 1711471538, + "expires_at": 1711557933, + "finalizing_at": null, + "completed_at": null, + "failed_at": null, + "expired_at": null, + "cancelling_at": 1711475133, + "cancelled_at": null, + "request_counts": { + "total": 100, + "completed": 23, + "failed": 1 + }, + "metadata": { + "customer_id": "user_123456789", + "batch_description": "Nightly eval job", + } + } + request: + curl: | + curl https://api.openai.com/v1/batches/batch_abc123/cancel \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -X POST + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + batch = client.batches.cancel( + "batch_id", + ) + print(batch.id) + node: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const batch = await openai.batches.cancel("batch_abc123"); + + console.log(batch); + } + + main(); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const batch = await client.batches.cancel('batch_id'); + + console.log(batch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + batch, err := client.Batches.Cancel(context.TODO(), "batch_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", batch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.batches.Batch; + import com.openai.models.batches.BatchCancelParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Batch batch = client.batches().cancel("batch_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + batch = openai.batches.cancel("batch_id") + + puts(batch) + description: >- + Cancels an in-progress batch. The batch will be in status `cancelling` for up to 10 minutes, before + changing to `cancelled`, where it will have partial results (if any) available in the output file. + /chat/completions: + get: + operationId: listChatCompletions + tags: + - Chat + summary: List Chat Completions + parameters: + - name: model + in: query + description: The model used to generate the Chat Completions. + required: false + schema: + type: string + - name: metadata + in: query + description: | + A list of metadata keys to filter the Chat Completions by. Example: + + `metadata[key1]=value1&metadata[key2]=value2` + required: false + schema: + $ref: '#/components/schemas/Metadata' + - name: after + in: query + description: Identifier for the last chat completion from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of Chat Completions to retrieve. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: >- + Sort order for Chat Completions by timestamp. Use `asc` for ascending order or `desc` for + descending order. Defaults to `asc`. + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + responses: + '200': + description: A list of Chat Completions + content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionList' + x-oaiMeta: + name: List Chat Completions + group: chat + returns: >- + A list of [Chat Completions](https://platform.openai.com/docs/api-reference/chat/list-object) + matching the specified filters. + path: list + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "chat.completion", + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "model": "gpt-4.1-2025-04-14", + "created": 1738960610, + "request_id": "req_ded8ab984ec4bf840f37566c1011c417", + "tool_choice": null, + "usage": { + "total_tokens": 31, + "completion_tokens": 18, + "prompt_tokens": 13 + }, + "seed": 4944116822809979520, + "top_p": 1.0, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "system_fingerprint": "fp_50cad350e4", + "input_user": null, + "service_tier": "default", + "tools": null, + "metadata": {}, + "choices": [ + { + "index": 0, + "message": { + "content": "Mind of circuits hum, \nLearning patterns in silence— \nFuture's quiet spark.", + "role": "assistant", + "tool_calls": null, + "function_call": null + }, + "finish_reason": "stop", + "logprobs": null + } + ], + "response_format": null + } + ], + "first_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "last_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.chat.completions.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const chatCompletion of client.chat.completions.list()) { + console.log(chatCompletion.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Chat.Completions.List(context.TODO(), openai.ChatCompletionListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.chat.completions.ChatCompletionListPage; + import com.openai.models.chat.completions.ChatCompletionListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionListPage page = client.chat().completions().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.chat.completions.list + + puts(page) + description: | + List stored Chat Completions. Only Chat Completions that have been stored + with the `store` parameter set to `true` will be returned. + post: + operationId: createChatCompletion + tags: + - Chat + summary: Create chat completion + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateChatCompletionRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateChatCompletionResponse' + text/event-stream: + schema: + $ref: '#/components/schemas/CreateChatCompletionStreamResponse' + x-oaiMeta: + name: Create chat completion + group: chat + returns: > + Returns a [chat completion](https://platform.openai.com/docs/api-reference/chat/object) object, or a + streamed sequence of [chat completion + chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects if the request is + streamed. + path: create + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_chat_model_id", + "messages": [ + { + "role": "developer", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.create( + messages=[{ + "content": "string", + "role": "developer", + }], + model="gpt-4o", + ) + print(chat_completion) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.create({ + messages: [{ content: 'string', role: 'developer' }], + model: 'gpt-4o', + }); + + console.log(chatCompletion); + csharp: | + using System; + using System.Collections.Generic; + + using OpenAI.Chat; + + ChatClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + List messages = + [ + new SystemChatMessage("You are a helpful assistant."), + new UserChatMessage("Hello!") + ]; + + ChatCompletion completion = client.CompleteChat(messages); + + Console.WriteLine(completion.Content[0].Text); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{openai.ChatCompletionMessageParamUnion{ + OfDeveloper: &openai.ChatCompletionDeveloperMessageParam{ + Content: openai.ChatCompletionDeveloperMessageParamContentUnion{ + OfString: openai.String("string"), + }, + }, + }}, + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() + .addDeveloperMessage("string") + .model(ChatModel.GPT_5_1) + .build(); + ChatCompletion chatCompletion = client.chat().completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + chat_completion = openai.chat.completions.create(messages: [{content: "string", role: + :developer}], model: :"gpt-5.1") + + + puts(chat_completion) + response: | + { + "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT", + "object": "chat.completion", + "created": 1741569952, + "model": "gpt-4.1-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 19, + "completion_tokens": 10, + "total_tokens": 29, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default" + } + - title: Image input + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.create( + messages=[{ + "content": "string", + "role": "developer", + }], + model="gpt-4o", + ) + print(chat_completion) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.create({ + messages: [{ content: 'string', role: 'developer' }], + model: 'gpt-4o', + }); + + console.log(chatCompletion); + csharp: | + using System; + using System.Collections.Generic; + + using OpenAI.Chat; + + ChatClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + List messages = + [ + new UserChatMessage( + [ + ChatMessageContentPart.CreateTextPart("What's in this image?"), + ChatMessageContentPart.CreateImagePart(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg")) + ]) + ]; + + ChatCompletion completion = client.CompleteChat(messages); + + Console.WriteLine(completion.Content[0].Text); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{openai.ChatCompletionMessageParamUnion{ + OfDeveloper: &openai.ChatCompletionDeveloperMessageParam{ + Content: openai.ChatCompletionDeveloperMessageParamContentUnion{ + OfString: openai.String("string"), + }, + }, + }}, + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() + .addDeveloperMessage("string") + .model(ChatModel.GPT_5_1) + .build(); + ChatCompletion chatCompletion = client.chat().completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + chat_completion = openai.chat.completions.create(messages: [{content: "string", role: + :developer}], model: :"gpt-5.1") + + + puts(chat_completion) + response: | + { + "id": "chatcmpl-B9MHDbslfkBeAs8l4bebGdFOJ6PeG", + "object": "chat.completion", + "created": 1741570283, + "model": "gpt-4.1-2025-04-14", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The image shows a wooden boardwalk path running through a lush green field or meadow. The sky is bright blue with some scattered clouds, giving the scene a serene and peaceful atmosphere. Trees and shrubs are visible in the background.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 1117, + "completion_tokens": 46, + "total_tokens": 1163, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default" + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_chat_model_id", + "messages": [ + { + "role": "developer", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ], + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.create( + messages=[{ + "content": "string", + "role": "developer", + }], + model="gpt-4o", + ) + print(chat_completion) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.create({ + messages: [{ content: 'string', role: 'developer' }], + model: 'gpt-4o', + }); + + console.log(chatCompletion); + csharp: > + using System; + + using System.ClientModel; + + using System.Collections.Generic; + + using System.Threading.Tasks; + + + using OpenAI.Chat; + + + ChatClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + + List messages = + + [ + new SystemChatMessage("You are a helpful assistant."), + new UserChatMessage("Hello!") + ]; + + + AsyncCollectionResult completionUpdates = + client.CompleteChatStreamingAsync(messages); + + + await foreach (StreamingChatCompletionUpdate completionUpdate in completionUpdates) + + { + if (completionUpdate.ContentUpdate.Count > 0) + { + Console.Write(completionUpdate.ContentUpdate[0].Text); + } + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{openai.ChatCompletionMessageParamUnion{ + OfDeveloper: &openai.ChatCompletionDeveloperMessageParam{ + Content: openai.ChatCompletionDeveloperMessageParamContentUnion{ + OfString: openai.String("string"), + }, + }, + }}, + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() + .addDeveloperMessage("string") + .model(ChatModel.GPT_5_1) + .build(); + ChatCompletion chatCompletion = client.chat().completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + chat_completion = openai.chat.completions.create(messages: [{content: "string", role: + :developer}], model: :"gpt-5.1") + + + puts(chat_completion) + response: > + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]} + + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + .... + + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + - title: Functions + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "messages": [ + { + "role": "user", + "content": "What is the weather like in Boston today?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "tool_choice": "auto" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.create( + messages=[{ + "content": "string", + "role": "developer", + }], + model="gpt-4o", + ) + print(chat_completion) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.create({ + messages: [{ content: 'string', role: 'developer' }], + model: 'gpt-4o', + }); + + console.log(chatCompletion); + csharp: | + using System; + using System.Collections.Generic; + + using OpenAI.Chat; + + ChatClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ChatTool getCurrentWeatherTool = ChatTool.CreateFunctionTool( + functionName: "get_current_weather", + functionDescription: "Get the current weather in a given location", + functionParameters: BinaryData.FromString(""" + { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ "celsius", "fahrenheit" ] + } + }, + "required": [ "location" ] + } + """) + ); + + List messages = + [ + new UserChatMessage("What's the weather like in Boston today?"), + ]; + + ChatCompletionOptions options = new() + { + Tools = + { + getCurrentWeatherTool + }, + ToolChoice = ChatToolChoice.CreateAutoChoice(), + }; + + ChatCompletion completion = client.CompleteChat(messages, options); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{openai.ChatCompletionMessageParamUnion{ + OfDeveloper: &openai.ChatCompletionDeveloperMessageParam{ + Content: openai.ChatCompletionDeveloperMessageParamContentUnion{ + OfString: openai.String("string"), + }, + }, + }}, + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() + .addDeveloperMessage("string") + .model(ChatModel.GPT_5_1) + .build(); + ChatCompletion chatCompletion = client.chat().completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + chat_completion = openai.chat.completions.create(messages: [{content: "string", role: + :developer}], model: :"gpt-5.1") + + + puts(chat_completion) + response: | + { + "id": "chatcmpl-abc123", + "object": "chat.completion", + "created": 1699896916, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\n\"location\": \"Boston, MA\"\n}" + } + } + ] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 82, + "completion_tokens": 17, + "total_tokens": 99, + "completion_tokens_details": { + "reasoning_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + } + } + - title: Logprobs + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_chat_model_id", + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ], + "logprobs": true, + "top_logprobs": 2 + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.create( + messages=[{ + "content": "string", + "role": "developer", + }], + model="gpt-4o", + ) + print(chat_completion) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.create({ + messages: [{ content: 'string', role: 'developer' }], + model: 'gpt-4o', + }); + + console.log(chatCompletion); + csharp: | + using System; + using System.Collections.Generic; + + using OpenAI.Chat; + + ChatClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + List messages = + [ + new UserChatMessage("Hello!") + ]; + + ChatCompletionOptions options = new() + { + IncludeLogProbabilities = true, + TopLogProbabilityCount = 2 + }; + + ChatCompletion completion = client.CompleteChat(messages, options); + + Console.WriteLine(completion.Content[0].Text); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ + Messages: []openai.ChatCompletionMessageParamUnion{openai.ChatCompletionMessageParamUnion{ + OfDeveloper: &openai.ChatCompletionDeveloperMessageParam{ + Content: openai.ChatCompletionDeveloperMessageParamContentUnion{ + OfString: openai.String("string"), + }, + }, + }}, + Model: shared.ChatModelGPT5_1, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.ChatModel; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() + .addDeveloperMessage("string") + .model(ChatModel.GPT_5_1) + .build(); + ChatCompletion chatCompletion = client.chat().completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + chat_completion = openai.chat.completions.create(messages: [{content: "string", role: + :developer}], model: :"gpt-5.1") + + + puts(chat_completion) + response: | + { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1702685778, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "logprobs": { + "content": [ + { + "token": "Hello", + "logprob": -0.31725305, + "bytes": [72, 101, 108, 108, 111], + "top_logprobs": [ + { + "token": "Hello", + "logprob": -0.31725305, + "bytes": [72, 101, 108, 108, 111] + }, + { + "token": "Hi", + "logprob": -1.3190403, + "bytes": [72, 105] + } + ] + }, + { + "token": "!", + "logprob": -0.02380986, + "bytes": [ + 33 + ], + "top_logprobs": [ + { + "token": "!", + "logprob": -0.02380986, + "bytes": [33] + }, + { + "token": " there", + "logprob": -3.787621, + "bytes": [32, 116, 104, 101, 114, 101] + } + ] + }, + { + "token": " How", + "logprob": -0.000054669687, + "bytes": [32, 72, 111, 119], + "top_logprobs": [ + { + "token": " How", + "logprob": -0.000054669687, + "bytes": [32, 72, 111, 119] + }, + { + "token": "<|end|>", + "logprob": -10.953937, + "bytes": null + } + ] + }, + { + "token": " can", + "logprob": -0.015801601, + "bytes": [32, 99, 97, 110], + "top_logprobs": [ + { + "token": " can", + "logprob": -0.015801601, + "bytes": [32, 99, 97, 110] + }, + { + "token": " may", + "logprob": -4.161023, + "bytes": [32, 109, 97, 121] + } + ] + }, + { + "token": " I", + "logprob": -3.7697225e-6, + "bytes": [ + 32, + 73 + ], + "top_logprobs": [ + { + "token": " I", + "logprob": -3.7697225e-6, + "bytes": [32, 73] + }, + { + "token": " assist", + "logprob": -13.596657, + "bytes": [32, 97, 115, 115, 105, 115, 116] + } + ] + }, + { + "token": " assist", + "logprob": -0.04571125, + "bytes": [32, 97, 115, 115, 105, 115, 116], + "top_logprobs": [ + { + "token": " assist", + "logprob": -0.04571125, + "bytes": [32, 97, 115, 115, 105, 115, 116] + }, + { + "token": " help", + "logprob": -3.1089056, + "bytes": [32, 104, 101, 108, 112] + } + ] + }, + { + "token": " you", + "logprob": -5.4385737e-6, + "bytes": [32, 121, 111, 117], + "top_logprobs": [ + { + "token": " you", + "logprob": -5.4385737e-6, + "bytes": [32, 121, 111, 117] + }, + { + "token": " today", + "logprob": -12.807695, + "bytes": [32, 116, 111, 100, 97, 121] + } + ] + }, + { + "token": " today", + "logprob": -0.0040071653, + "bytes": [32, 116, 111, 100, 97, 121], + "top_logprobs": [ + { + "token": " today", + "logprob": -0.0040071653, + "bytes": [32, 116, 111, 100, 97, 121] + }, + { + "token": "?", + "logprob": -5.5247097, + "bytes": [63] + } + ] + }, + { + "token": "?", + "logprob": -0.0008108172, + "bytes": [63], + "top_logprobs": [ + { + "token": "?", + "logprob": -0.0008108172, + "bytes": [63] + }, + { + "token": "?\n", + "logprob": -7.184561, + "bytes": [63, 10] + } + ] + } + ] + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 9, + "total_tokens": 18, + "completion_tokens_details": { + "reasoning_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": null + } + description: > + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) + + to take advantage of the latest OpenAI platform features. Compare + + [Chat Completions with + Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + + + --- + + + Creates a model response for the given chat conversation. Learn more in the + + [text generation](https://platform.openai.com/docs/guides/text-generation), + [vision](https://platform.openai.com/docs/guides/vision), + + and [audio](https://platform.openai.com/docs/guides/audio) guides. + + + Parameter support can differ depending on the model used to generate the + + response, particularly for newer reasoning models. Parameters that are only + + supported for reasoning models are noted below. For the current state of + + unsupported parameters in reasoning models, + + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + /chat/completions/{completion_id}: + get: + operationId: getChatCompletion + tags: + - Chat + summary: Get chat completion + parameters: + - in: path + name: completion_id + required: true + schema: + type: string + description: The ID of the chat completion to retrieve. + responses: + '200': + description: A chat completion + content: + application/json: + schema: + $ref: '#/components/schemas/CreateChatCompletionResponse' + x-oaiMeta: + name: Get chat completion + group: chat + returns: >- + The [ChatCompletion](https://platform.openai.com/docs/api-reference/chat/object) object matching the + specified ID. + examples: + response: | + { + "object": "chat.completion", + "id": "chatcmpl-abc123", + "model": "gpt-4o-2024-08-06", + "created": 1738960610, + "request_id": "req_ded8ab984ec4bf840f37566c1011c417", + "tool_choice": null, + "usage": { + "total_tokens": 31, + "completion_tokens": 18, + "prompt_tokens": 13 + }, + "seed": 4944116822809979520, + "top_p": 1.0, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "system_fingerprint": "fp_50cad350e4", + "input_user": null, + "service_tier": "default", + "tools": null, + "metadata": {}, + "choices": [ + { + "index": 0, + "message": { + "content": "Mind of circuits hum, \nLearning patterns in silence— \nFuture's quiet spark.", + "role": "assistant", + "tool_calls": null, + "function_call": null + }, + "finish_reason": "stop", + "logprobs": null + } + ], + "response_format": null + } + request: + curl: | + curl https://api.openai.com/v1/chat/completions/chatcmpl-abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.retrieve( + "completion_id", + ) + print(chat_completion.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletion = await client.chat.completions.retrieve('completion_id'); + + console.log(chatCompletion.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.Get(context.TODO(), "completion_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletion chatCompletion = client.chat().completions().retrieve("completion_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chat_completion = openai.chat.completions.retrieve("completion_id") + + puts(chat_completion) + description: | + Get a stored chat completion. Only Chat Completions that have been created + with the `store` parameter set to `true` will be returned. + post: + operationId: updateChatCompletion + tags: + - Chat + summary: Update chat completion + parameters: + - in: path + name: completion_id + required: true + schema: + type: string + description: The ID of the chat completion to update. + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - metadata + properties: + metadata: + $ref: '#/components/schemas/Metadata' + responses: + '200': + description: A chat completion + content: + application/json: + schema: + $ref: '#/components/schemas/CreateChatCompletionResponse' + x-oaiMeta: + name: Update chat completion + group: chat + returns: >- + The [ChatCompletion](https://platform.openai.com/docs/api-reference/chat/object) object matching the + specified ID. + examples: + response: | + { + "object": "chat.completion", + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "model": "gpt-4o-2024-08-06", + "created": 1738960610, + "request_id": "req_ded8ab984ec4bf840f37566c1011c417", + "tool_choice": null, + "usage": { + "total_tokens": 31, + "completion_tokens": 18, + "prompt_tokens": 13 + }, + "seed": 4944116822809979520, + "top_p": 1.0, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "system_fingerprint": "fp_50cad350e4", + "input_user": null, + "service_tier": "default", + "tools": null, + "metadata": { + "foo": "bar" + }, + "choices": [ + { + "index": 0, + "message": { + "content": "Mind of circuits hum, \nLearning patterns in silence— \nFuture's quiet spark.", + "role": "assistant", + "tool_calls": null, + "function_call": null + }, + "finish_reason": "stop", + "logprobs": null + } + ], + "response_format": null + } + request: + curl: | + curl -X POST https://api.openai.com/v1/chat/completions/chat_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"metadata": {"foo": "bar"}}' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion = client.chat.completions.update( + completion_id="completion_id", + metadata={ + "foo": "string" + }, + ) + print(chat_completion.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const chatCompletion = await client.chat.completions.update('completion_id', { metadata: { foo: + 'string' } }); + + + console.log(chatCompletion.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletion, err := client.Chat.Completions.Update( + context.TODO(), + "completion_id", + openai.ChatCompletionUpdateParams{ + Metadata: shared.Metadata{ + "foo": "string", + }, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletion.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.JsonValue; + import com.openai.models.chat.completions.ChatCompletion; + import com.openai.models.chat.completions.ChatCompletionUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionUpdateParams params = ChatCompletionUpdateParams.builder() + .completionId("completion_id") + .metadata(ChatCompletionUpdateParams.Metadata.builder() + .putAdditionalProperty("foo", JsonValue.from("string")) + .build()) + .build(); + ChatCompletion chatCompletion = client.chat().completions().update(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chat_completion = openai.chat.completions.update("completion_id", metadata: {foo: "string"}) + + puts(chat_completion) + description: | + Modify a stored chat completion. Only Chat Completions that have been + created with the `store` parameter set to `true` can be modified. Currently, + the only supported modification is to update the `metadata` field. + delete: + operationId: deleteChatCompletion + tags: + - Chat + summary: Delete chat completion + parameters: + - in: path + name: completion_id + required: true + schema: + type: string + description: The ID of the chat completion to delete. + responses: + '200': + description: The chat completion was deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionDeleted' + x-oaiMeta: + name: Delete chat completion + group: chat + returns: A deletion confirmation object. + examples: + response: | + { + "object": "chat.completion.deleted", + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/chat/completions/chat_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_completion_deleted = client.chat.completions.delete( + "completion_id", + ) + print(chat_completion_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatCompletionDeleted = await client.chat.completions.delete('completion_id'); + + console.log(chatCompletionDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatCompletionDeleted, err := client.Chat.Completions.Delete(context.TODO(), "completion_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatCompletionDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.chat.completions.ChatCompletionDeleteParams; + import com.openai.models.chat.completions.ChatCompletionDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatCompletionDeleted chatCompletionDeleted = client.chat().completions().delete("completion_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chat_completion_deleted = openai.chat.completions.delete("completion_id") + + puts(chat_completion_deleted) + description: | + Delete a stored chat completion. Only Chat Completions that have been + created with the `store` parameter set to `true` can be deleted. + /chat/completions/{completion_id}/messages: + get: + operationId: getChatCompletionMessages + tags: + - Chat + summary: Get chat messages + parameters: + - in: path + name: completion_id + required: true + schema: + type: string + description: The ID of the chat completion to retrieve messages from. + - name: after + in: query + description: Identifier for the last message from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of messages to retrieve. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: >- + Sort order for messages by timestamp. Use `asc` for ascending order or `desc` for descending + order. Defaults to `asc`. + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + responses: + '200': + description: A list of messages + content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionMessageList' + x-oaiMeta: + name: Get chat messages + group: chat + returns: >- + A list of [messages](https://platform.openai.com/docs/api-reference/chat/message-list) for the + specified chat completion. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "role": "user", + "content": "write a haiku about ai", + "name": null, + "content_parts": null + } + ], + "first_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "last_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/chat/completions/chat_abc123/messages \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.chat.completions.messages.list( + completion_id="completion_id", + ) + page = page.data[0] + print(page) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + // Automatically fetches more pages as needed. + + for await (const chatCompletionStoreMessage of + client.chat.completions.messages.list('completion_id')) { + console.log(chatCompletionStoreMessage); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Chat.Completions.Messages.List( + context.TODO(), + "completion_id", + openai.ChatCompletionMessageListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.chat.completions.messages.MessageListPage; + import com.openai.models.chat.completions.messages.MessageListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageListPage page = client.chat().completions().messages().list("completion_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.chat.completions.messages.list("completion_id") + + puts(page) + description: | + Get the messages in a stored chat completion. Only Chat Completions that + have been created with the `store` parameter set to `true` will be + returned. + /completions: + post: + operationId: createCompletion + tags: + - Completions + summary: Create completion + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateCompletionRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateCompletionResponse' + x-oaiMeta: + name: Create completion + group: completions + returns: > + Returns a [completion](https://platform.openai.com/docs/api-reference/completions/object) object, or + a sequence of completion objects if the request is streamed. + legacy: true + examples: + - title: No streaming + request: + curl: | + curl https://api.openai.com/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_completion_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0 + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + completion = client.completions.create( + model="string", + prompt="This is a test.", + ) + print(completion) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const completion = await client.completions.create({ model: 'string', prompt: 'This is a + test.' }); + + + console.log(completion); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + completion, err := client.Completions.New(context.TODO(), openai.CompletionNewParams{ + Model: openai.CompletionNewParamsModelGPT3_5TurboInstruct, + Prompt: openai.CompletionNewParamsPromptUnion{ + OfString: openai.String("This is a test."), + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", completion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.completions.Completion; + import com.openai.models.completions.CompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + CompletionCreateParams params = CompletionCreateParams.builder() + .model(CompletionCreateParams.Model.GPT_3_5_TURBO_INSTRUCT) + .prompt("This is a test.") + .build(); + Completion completion = client.completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + completion = openai.completions.create(model: :"gpt-3.5-turbo-instruct", prompt: "This is a + test.") + + + puts(completion) + response: | + { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "VAR_completion_model_id", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_completion_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0, + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + completion = client.completions.create( + model="string", + prompt="This is a test.", + ) + print(completion) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const completion = await client.completions.create({ model: 'string', prompt: 'This is a + test.' }); + + + console.log(completion); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + completion, err := client.Completions.New(context.TODO(), openai.CompletionNewParams{ + Model: openai.CompletionNewParamsModelGPT3_5TurboInstruct, + Prompt: openai.CompletionNewParamsPromptUnion{ + OfString: openai.String("This is a test."), + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", completion) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.completions.Completion; + import com.openai.models.completions.CompletionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + CompletionCreateParams params = CompletionCreateParams.builder() + .model(CompletionCreateParams.Model.GPT_3_5_TURBO_INSTRUCT) + .prompt("This is a test.") + .build(); + Completion completion = client.completions().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + completion = openai.completions.create(model: :"gpt-3.5-turbo-instruct", prompt: "This is a + test.") + + + puts(completion) + response: | + { + "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe", + "object": "text_completion", + "created": 1690759702, + "choices": [ + { + "text": "This", + "index": 0, + "logprobs": null, + "finish_reason": null + } + ], + "model": "gpt-3.5-turbo-instruct" + "system_fingerprint": "fp_44709d6fcb", + } + description: Creates a completion for the provided prompt and parameters. + /containers: + get: + summary: List containers + description: List Containers + operationId: ListContainers + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerListResource' + x-oaiMeta: + name: List containers + group: containers + returns: a list of [container](https://platform.openai.com/docs/api-reference/containers/object) objects. + path: get + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863", + "object": "container", + "created_at": 1747844794, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747844794, + "name": "My Container" + } + ], + "first_id": "container_123", + "last_id": "container_123", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/containers \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const containerListResponse of client.containers.list()) { + console.log(containerListResponse.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.containers.list() + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Containers.List(context.TODO(), openai.ContainerListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.ContainerListPage; + import com.openai.models.containers.ContainerListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ContainerListPage page = client.containers().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.containers.list + + puts(page) + post: + summary: Create container + description: Create Container + operationId: CreateContainer + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateContainerBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerResource' + x-oaiMeta: + name: Create container + group: containers + returns: The created [container](https://platform.openai.com/docs/api-reference/containers/object) object. + path: post + examples: + response: | + { + "id": "cntr_682e30645a488191b6363a0cbefc0f0a025ec61b66250591", + "object": "container", + "created_at": 1747857508, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747857508, + "name": "My Container" + } + request: + curl: | + curl https://api.openai.com/v1/containers \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My Container" + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const container = await client.containers.create({ name: 'name' }); + + console.log(container.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + container = client.containers.create( + name="name", + ) + print(container.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + container, err := client.Containers.New(context.TODO(), openai.ContainerNewParams{ + Name: "name", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", container.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.ContainerCreateParams; + import com.openai.models.containers.ContainerCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ContainerCreateParams params = ContainerCreateParams.builder() + .name("name") + .build(); + ContainerCreateResponse container = client.containers().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + container = openai.containers.create(name: "name") + + puts(container) + /containers/{container_id}: + get: + summary: Retrieve container + description: Retrieve Container + operationId: RetrieveContainer + parameters: + - name: container_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerResource' + x-oaiMeta: + name: Retrieve container + group: containers + returns: The [container](https://platform.openai.com/docs/api-reference/containers/object) object. + path: get + examples: + response: | + { + "id": "cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863", + "object": "container", + "created_at": 1747844794, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747844794, + "name": "My Container" + } + request: + curl: > + curl https://api.openai.com/v1/containers/cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863 + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const container = await client.containers.retrieve('container_id'); + + console.log(container.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + container = client.containers.retrieve( + "container_id", + ) + print(container.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + container, err := client.Containers.Get(context.TODO(), "container_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", container.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.ContainerRetrieveParams; + import com.openai.models.containers.ContainerRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ContainerRetrieveResponse container = client.containers().retrieve("container_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + container = openai.containers.retrieve("container_id") + + puts(container) + delete: + operationId: DeleteContainer + summary: Delete a container + description: Delete Container + parameters: + - name: container_id + in: path + description: The ID of the container to delete. + required: true + schema: + type: string + responses: + '200': + description: OK + x-oaiMeta: + name: Delete a container + group: containers + returns: Deletion Status + path: delete + examples: + response: | + { + "id": "cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863", + "object": "container.deleted", + "deleted": true + } + request: + curl: > + curl -X DELETE + https://api.openai.com/v1/containers/cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863 \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.containers.delete('container_id'); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.containers.delete( + "container_id", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Containers.Delete(context.TODO(), "container_id") + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.ContainerDeleteParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + client.containers().delete("container_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.containers.delete("container_id") + + puts(result) + /containers/{container_id}/files: + post: + summary: Create container file + description: > + Create a Container File + + + You can send either a multipart/form-data request with the raw file content, or a JSON request with a + file ID. + operationId: CreateContainerFile + parameters: + - name: container_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateContainerFileBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerFileResource' + x-oaiMeta: + name: Create container file + group: containers + returns: >- + The created [container file](https://platform.openai.com/docs/api-reference/container-files/object) + object. + path: post + examples: + response: | + { + "id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "object": "container.file", + "created_at": 1747848842, + "bytes": 880, + "container_id": "cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04", + "path": "/mnt/data/88e12fa445d32636f190a0b33daed6cb-tsconfig.json", + "source": "user" + } + request: + curl: > + curl + https://api.openai.com/v1/containers/cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04/files + \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F file="@example.txt" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const file = await client.containers.files.create('container_id'); + + console.log(file.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + file = client.containers.files.create( + container_id="container_id", + ) + print(file.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + file, err := client.Containers.Files.New( + context.TODO(), + "container_id", + openai.ContainerFileNewParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", file.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.files.FileCreateParams; + import com.openai.models.containers.files.FileCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileCreateResponse file = client.containers().files().create("container_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + file = openai.containers.files.create("container_id") + + puts(file) + get: + summary: List container files + description: List Container files + operationId: ListContainerFiles + parameters: + - name: container_id + in: path + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerFileListResource' + x-oaiMeta: + name: List container files + group: containers + returns: >- + a list of [container file](https://platform.openai.com/docs/api-reference/container-files/object) + objects. + path: get + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "object": "container.file", + "created_at": 1747848842, + "bytes": 880, + "container_id": "cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04", + "path": "/mnt/data/88e12fa445d32636f190a0b33daed6cb-tsconfig.json", + "source": "user" + } + ], + "first_id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "has_more": false, + "last_id": "cfile_682e0e8a43c88191a7978f477a09bdf5" + } + request: + curl: > + curl + https://api.openai.com/v1/containers/cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04/files + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const fileListResponse of client.containers.files.list('container_id')) { + console.log(fileListResponse.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.containers.files.list( + container_id="container_id", + ) + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Containers.Files.List( + context.TODO(), + "container_id", + openai.ContainerFileListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.files.FileListPage; + import com.openai.models.containers.files.FileListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileListPage page = client.containers().files().list("container_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.containers.files.list("container_id") + + puts(page) + /containers/{container_id}/files/{file_id}: + get: + summary: Retrieve container file + description: Retrieve Container File + operationId: RetrieveContainerFile + parameters: + - name: container_id + in: path + required: true + schema: + type: string + - name: file_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ContainerFileResource' + x-oaiMeta: + name: Retrieve container file + group: containers + returns: The [container file](https://platform.openai.com/docs/api-reference/container-files/object) object. + path: get + examples: + response: | + { + "id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "object": "container.file", + "created_at": 1747848842, + "bytes": 880, + "container_id": "cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04", + "path": "/mnt/data/88e12fa445d32636f190a0b33daed6cb-tsconfig.json", + "source": "user" + } + request: + curl: | + curl https://api.openai.com/v1/containers/container_123/files/file_456 \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const file = await client.containers.files.retrieve('file_id', { container_id: 'container_id' + }); + + + console.log(file.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + file = client.containers.files.retrieve( + file_id="file_id", + container_id="container_id", + ) + print(file.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + file, err := client.Containers.Files.Get( + context.TODO(), + "container_id", + "file_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", file.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.files.FileRetrieveParams; + import com.openai.models.containers.files.FileRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileRetrieveParams params = FileRetrieveParams.builder() + .containerId("container_id") + .fileId("file_id") + .build(); + FileRetrieveResponse file = client.containers().files().retrieve(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + file = openai.containers.files.retrieve("file_id", container_id: "container_id") + + puts(file) + delete: + operationId: DeleteContainerFile + summary: Delete a container file + description: Delete Container File + parameters: + - name: container_id + in: path + required: true + schema: + type: string + - name: file_id + in: path + required: true + schema: + type: string + responses: + '200': + description: OK + x-oaiMeta: + name: Delete a container file + group: containers + returns: Deletion Status + path: delete + examples: + response: | + { + "id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "object": "container.file.deleted", + "deleted": true + } + request: + curl: > + curl -X DELETE + https://api.openai.com/v1/containers/cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863/files/cfile_682e0e8a43c88191a7978f477a09bdf5 + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.containers.files.delete('file_id', { container_id: 'container_id' }); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.containers.files.delete( + file_id="file_id", + container_id="container_id", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Containers.Files.Delete( + context.TODO(), + "container_id", + "file_id", + ) + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.containers.files.FileDeleteParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileDeleteParams params = FileDeleteParams.builder() + .containerId("container_id") + .fileId("file_id") + .build(); + client.containers().files().delete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.containers.files.delete("file_id", container_id: "container_id") + + puts(result) + /containers/{container_id}/files/{file_id}/content: + get: + summary: Retrieve container file content + description: Retrieve Container File Content + operationId: RetrieveContainerFileContent + parameters: + - name: container_id + in: path + required: true + schema: + type: string + - name: file_id + in: path + required: true + schema: + type: string + responses: + '200': + description: Success + x-oaiMeta: + name: Retrieve container file content + group: containers + returns: The contents of the container file. + path: get + examples: + response: | + + request: + curl: | + curl https://api.openai.com/v1/containers/container_123/files/cfile_456/content \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const content = await client.containers.files.content.retrieve('file_id', { container_id: + 'container_id' }); + + + console.log(content); + + + const data = await content.blob(); + + console.log(data); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + content = client.containers.files.content.retrieve( + file_id="file_id", + container_id="container_id", + ) + print(content) + data = content.read() + print(data) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + content, err := client.Containers.Files.Content.Get( + context.TODO(), + "container_id", + "file_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", content) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.http.HttpResponse; + import com.openai.models.containers.files.content.ContentRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ContentRetrieveParams params = ContentRetrieveParams.builder() + .containerId("container_id") + .fileId("file_id") + .build(); + HttpResponse content = client.containers().files().content().retrieve(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + content = openai.containers.files.content.retrieve("file_id", container_id: "container_id") + + puts(content) + /conversations/{conversation_id}/items: + post: + operationId: createConversationItems + tags: + - Conversations + summary: Create items + parameters: + - in: path + name: conversation_id + required: true + schema: + type: string + example: conv_123 + description: The ID of the conversation to add the item to. + - name: include + in: query + required: false + schema: + type: array + items: + $ref: '#/components/schemas/IncludeEnum' + description: > + Additional fields to include in the response. See the `include` + + parameter for [listing Conversation items + above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include) + for more information. + requestBody: + required: true + content: + application/json: + schema: + properties: + items: + type: array + description: | + The items to add to the conversation. You may add up to 20 items at a time. + items: + $ref: '#/components/schemas/InputItem' + maxItems: 20 + required: + - items + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + x-oaiMeta: + name: Create items + group: conversations + returns: > + Returns the list of added + [items](https://platform.openai.com/docs/api-reference/conversations/list-items-object). + path: create-item + examples: + - title: Add a user message to a conversation + request: + curl: | + curl https://api.openai.com/v1/conversations/conv_123/items \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "items": [ + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "Hello!"} + ] + }, + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "How are you?"} + ] + } + ] + }' + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const items = await client.conversations.items.create( + "conv_123", + { + items: [ + { + type: "message", + role: "user", + content: [{ type: "input_text", text: "Hello!" }], + }, + { + type: "message", + role: "user", + content: [{ type: "input_text", text: "How are you?" }], + }, + ], + } + ); + console.log(items.data); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation_item_list = client.conversations.items.create( + conversation_id="conv_123", + items=[{ + "content": "string", + "role": "user", + "type": "message", + }], + ) + print(conversation_item_list.first_id) + csharp: | + using System; + using System.Collections.Generic; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ConversationItemList created = client.ConversationItems.Create( + conversationId: "conv_123", + new CreateConversationItemsOptions + { + Items = new List + { + new ConversationMessage + { + Role = "user", + Content = + { + new ConversationInputText { Text = "Hello!" } + } + }, + new ConversationMessage + { + Role = "user", + Content = + { + new ConversationInputText { Text = "How are you?" } + } + } + } + } + ); + Console.WriteLine(created.Data.Count); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const conversationItemList = await client.conversations.items.create('conv_123', { + items: [{ content: 'string', role: 'user', type: 'message' }], + }); + + console.log(conversationItemList.first_id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/conversations" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversationItemList, err := client.Conversations.Items.New( + context.TODO(), + "conv_123", + conversations.ItemNewParams{ + Items: []responses.ResponseInputItemUnionParam{responses.ResponseInputItemUnionParam{ + OfMessage: &responses.EasyInputMessageParam{ + Content: responses.EasyInputMessageContentUnionParam{ + OfString: openai.String("string"), + }, + Role: responses.EasyInputMessageRoleUser, + Type: responses.EasyInputMessageTypeMessage, + }, + }}, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversationItemList.FirstID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.items.ConversationItemList; + import com.openai.models.conversations.items.ItemCreateParams; + import com.openai.models.responses.EasyInputMessage; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ItemCreateParams params = ItemCreateParams.builder() + .conversationId("conv_123") + .addItem(EasyInputMessage.builder() + .content("string") + .role(EasyInputMessage.Role.USER) + .type(EasyInputMessage.Type.MESSAGE) + .build()) + .build(); + ConversationItemList conversationItemList = client.conversations().items().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + conversation_item_list = openai.conversations.items.create("conv_123", items: [{content: + "string", role: :user, type: :message}]) + + + puts(conversation_item_list) + response: | + { + "object": "list", + "data": [ + { + "type": "message", + "id": "msg_abc", + "status": "completed", + "role": "user", + "content": [ + {"type": "input_text", "text": "Hello!"} + ] + }, + { + "type": "message", + "id": "msg_def", + "status": "completed", + "role": "user", + "content": [ + {"type": "input_text", "text": "How are you?"} + ] + } + ], + "first_id": "msg_abc", + "last_id": "msg_def", + "has_more": false + } + description: Create items in a conversation with the given ID. + get: + operationId: listConversationItems + tags: + - Conversations + summary: List items + parameters: + - in: path + name: conversation_id + required: true + schema: + type: string + example: conv_123 + description: The ID of the conversation to list items for. + - name: limit + in: query + description: | + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + default: 20 + - in: query + name: order + schema: + type: string + enum: + - asc + - desc + description: | + The order to return the input items in. Default is `desc`. + - `asc`: Return the input items in ascending order. + - `desc`: Return the input items in descending order. + - in: query + name: after + schema: + type: string + description: | + An item ID to list items after, used in pagination. + - name: include + in: query + required: false + schema: + type: array + items: + $ref: '#/components/schemas/IncludeEnum' + description: >- + Specify additional output data to include in the model response. Currently supported values are: + + - `web_search_call.action.sources`: Include the sources of the web search tool call. + + - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code + interpreter tool call items. + + - `computer_call_output.output.image_url`: Include image urls from the computer call output. + + - `file_search_call.results`: Include the search results of the file search tool call. + + - `message.input_image.image_url`: Include image urls from the input message. + + - `message.output_text.logprobs`: Include logprobs with assistant messages. + + - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in reasoning + item outputs. This enables reasoning items to be used in multi-turn conversations when using the + Responses API statelessly (like when the `store` parameter is set to `false`, or when an + organization is enrolled in the zero data retention program). + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItemList' + x-oaiMeta: + name: List items + group: conversations + returns: > + Returns a [list + object](https://platform.openai.com/docs/api-reference/conversations/list-items-object) containing + Conversation items. + path: list-items + examples: + - title: List items in a conversation + request: + curl: | + curl "https://api.openai.com/v1/conversations/conv_123/items?limit=10" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const items = await client.conversations.items.list("conv_123", { limit: 10 }); + console.log(items.data); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.conversations.items.list( + conversation_id="conv_123", + ) + page = page.data[0] + print(page) + csharp: | + using System; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ConversationItemList items = client.ConversationItems.List( + conversationId: "conv_123", + new ListConversationItemsOptions { Limit = 10 } + ); + Console.WriteLine(items.Data.Count); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const conversationItem of client.conversations.items.list('conv_123')) { + console.log(conversationItem); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/conversations" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Conversations.Items.List( + context.TODO(), + "conv_123", + conversations.ItemListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.items.ItemListPage; + import com.openai.models.conversations.items.ItemListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ItemListPage page = client.conversations().items().list("conv_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.conversations.items.list("conv_123") + + puts(page) + response: | + { + "object": "list", + "data": [ + { + "type": "message", + "id": "msg_abc", + "status": "completed", + "role": "user", + "content": [ + {"type": "input_text", "text": "Hello!"} + ] + } + ], + "first_id": "msg_abc", + "last_id": "msg_abc", + "has_more": false + } + description: List all items for a conversation with the given ID. + /conversations/{conversation_id}/items/{item_id}: + get: + operationId: getConversationItem + tags: + - Conversations + summary: Retrieve an item + parameters: + - in: path + name: conversation_id + required: true + schema: + type: string + example: conv_123 + description: The ID of the conversation that contains the item. + - in: path + name: item_id + required: true + schema: + type: string + example: msg_abc + description: The ID of the item to retrieve. + - name: include + in: query + required: false + schema: + type: array + items: + $ref: '#/components/schemas/IncludeEnum' + description: > + Additional fields to include in the response. See the `include` + + parameter for [listing Conversation items + above](https://platform.openai.com/docs/api-reference/conversations/list-items#conversations_list_items-include) + for more information. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationItem' + x-oaiMeta: + name: Retrieve an item + group: conversations + returns: > + Returns a [Conversation + Item](https://platform.openai.com/docs/api-reference/conversations/item-object). + path: get-item + examples: + - title: Retrieve an item + request: + curl: | + curl https://api.openai.com/v1/conversations/conv_123/items/msg_abc \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const item = await client.conversations.items.retrieve( + "conv_123", + "msg_abc" + ); + console.log(item); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation_item = client.conversations.items.retrieve( + item_id="msg_abc", + conversation_id="conv_123", + ) + print(conversation_item) + csharp: | + using System; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ConversationItem item = client.ConversationItems.Get( + conversationId: "conv_123", + itemId: "msg_abc" + ); + Console.WriteLine(item.Id); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const conversationItem = await client.conversations.items.retrieve('msg_abc', { + conversation_id: 'conv_123', + }); + + console.log(conversationItem); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/conversations" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversationItem, err := client.Conversations.Items.Get( + context.TODO(), + "conv_123", + "msg_abc", + conversations.ItemGetParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversationItem) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.items.ConversationItem; + import com.openai.models.conversations.items.ItemRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ItemRetrieveParams params = ItemRetrieveParams.builder() + .conversationId("conv_123") + .itemId("msg_abc") + .build(); + ConversationItem conversationItem = client.conversations().items().retrieve(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + conversation_item = openai.conversations.items.retrieve("msg_abc", conversation_id: + "conv_123") + + + puts(conversation_item) + response: | + { + "type": "message", + "id": "msg_abc", + "status": "completed", + "role": "user", + "content": [ + {"type": "input_text", "text": "Hello!"} + ] + } + description: Get a single item from a conversation with the given IDs. + delete: + operationId: deleteConversationItem + tags: + - Conversations + summary: Delete an item + parameters: + - in: path + name: conversation_id + required: true + schema: + type: string + example: conv_123 + description: The ID of the conversation that contains the item. + - in: path + name: item_id + required: true + schema: + type: string + example: msg_abc + description: The ID of the item to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationResource' + x-oaiMeta: + name: Delete an item + group: conversations + returns: > + Returns the updated + [Conversation](https://platform.openai.com/docs/api-reference/conversations/object) object. + path: delete-item + examples: + - title: Delete an item + request: + curl: | + curl -X DELETE https://api.openai.com/v1/conversations/conv_123/items/msg_abc \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const conversation = await client.conversations.items.delete( + "conv_123", + "msg_abc" + ); + console.log(conversation); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation = client.conversations.items.delete( + item_id="msg_abc", + conversation_id="conv_123", + ) + print(conversation.id) + csharp: | + using System; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + Conversation conversation = client.ConversationItems.Delete( + conversationId: "conv_123", + itemId: "msg_abc" + ); + Console.WriteLine(conversation.Id); + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const conversation = await client.conversations.items.delete('msg_abc', { conversation_id: + 'conv_123' }); + + + console.log(conversation.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversation, err := client.Conversations.Items.Delete( + context.TODO(), + "conv_123", + "msg_abc", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.Conversation; + import com.openai.models.conversations.items.ItemDeleteParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ItemDeleteParams params = ItemDeleteParams.builder() + .conversationId("conv_123") + .itemId("msg_abc") + .build(); + Conversation conversation = client.conversations().items().delete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + conversation = openai.conversations.items.delete("msg_abc", conversation_id: "conv_123") + + puts(conversation) + response: | + { + "id": "conv_123", + "object": "conversation", + "created_at": 1741900000, + "metadata": {"topic": "demo"} + } + description: Delete an item from a conversation with the given IDs. + /embeddings: + post: + operationId: createEmbedding + tags: + - Embeddings + summary: Create embeddings + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEmbeddingRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEmbeddingResponse' + x-oaiMeta: + name: Create embeddings + group: embeddings + returns: A list of [embedding](https://platform.openai.com/docs/api-reference/embeddings/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "embedding", + "embedding": [ + 0.0023064255, + -0.009327292, + .... (1536 floats total for ada-002) + -0.0028842222, + ], + "index": 0 + } + ], + "model": "text-embedding-ada-002", + "usage": { + "prompt_tokens": 8, + "total_tokens": 8 + } + } + request: + curl: | + curl https://api.openai.com/v1/embeddings \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "input": "The food was delicious and the waiter...", + "model": "text-embedding-ada-002", + "encoding_format": "float" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + create_embedding_response = client.embeddings.create( + input="The quick brown fox jumped over the lazy dog", + model="text-embedding-3-small", + ) + print(create_embedding_response.data) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const createEmbeddingResponse = await client.embeddings.create({ + input: 'The quick brown fox jumped over the lazy dog', + model: 'text-embedding-3-small', + }); + + console.log(createEmbeddingResponse.data); + csharp: > + using System; + + + using OpenAI.Embeddings; + + + EmbeddingClient client = new( + model: "text-embedding-3-small", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + + OpenAIEmbedding embedding = client.GenerateEmbedding(input: "The quick brown fox jumped over the + lazy dog"); + + ReadOnlyMemory vector = embedding.ToFloats(); + + + for (int i = 0; i < vector.Length; i++) + + { + Console.WriteLine($" [{i,4}] = {vector.Span[i]}"); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + createEmbeddingResponse, err := client.Embeddings.New(context.TODO(), openai.EmbeddingNewParams{ + Input: openai.EmbeddingNewParamsInputUnion{ + OfString: openai.String("The quick brown fox jumped over the lazy dog"), + }, + Model: openai.EmbeddingModelTextEmbeddingAda002, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", createEmbeddingResponse.Data) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.embeddings.CreateEmbeddingResponse; + import com.openai.models.embeddings.EmbeddingCreateParams; + import com.openai.models.embeddings.EmbeddingModel; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EmbeddingCreateParams params = EmbeddingCreateParams.builder() + .input("The quick brown fox jumped over the lazy dog") + .model(EmbeddingModel.TEXT_EMBEDDING_ADA_002) + .build(); + CreateEmbeddingResponse createEmbeddingResponse = client.embeddings().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + create_embedding_response = openai.embeddings.create( + input: "The quick brown fox jumped over the lazy dog", + model: :"text-embedding-ada-002" + ) + + puts(create_embedding_response) + description: Creates an embedding vector representing the input text. + /evals: + get: + operationId: listEvals + tags: + - Evals + summary: List evals + parameters: + - name: after + in: query + description: Identifier for the last eval from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of evals to retrieve. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for descending order. + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + - name: order_by + in: query + description: | + Evals can be ordered by creation time or last updated time. Use + `created_at` for creation time or `updated_at` for last updated time. + required: false + schema: + type: string + enum: + - created_at + - updated_at + default: created_at + responses: + '200': + description: A list of evals + content: + application/json: + schema: + $ref: '#/components/schemas/EvalList' + x-oaiMeta: + name: List evals + group: evals + returns: >- + A list of [evals](https://platform.openai.com/docs/api-reference/evals/object) matching the + specified filters. + path: list + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "eval_67abd54d9b0081909a86353f6fb9317a", + "object": "eval", + "data_source_config": { + "type": "stored_completions", + "metadata": { + "usecase": "push_notifications_summarizer" + }, + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object" + }, + "sample": { + "type": "object" + } + }, + "required": [ + "item", + "sample" + ] + } + }, + "testing_criteria": [ + { + "name": "Push Notification Summary Grader", + "id": "Push Notification Summary Grader-9b876f24-4762-4be9-aff4-db7a9b31c673", + "type": "label_model", + "model": "o3-mini", + "input": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "\nLabel the following push notification summary as either correct or incorrect.\nThe push notification and the summary will be provided below.\nA good push notificiation summary is concise and snappy.\nIf it is good, then label it as correct, if not, then incorrect.\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "\nPush notifications: {{item.input}}\nSummary: {{sample.output_text}}\n" + } + } + ], + "passing_labels": [ + "correct" + ], + "labels": [ + "correct", + "incorrect" + ], + "sampling_params": null + } + ], + "name": "Push Notification Summary Grader", + "created_at": 1739314509, + "metadata": { + "description": "A stored completions eval for push notification summaries" + } + } + ], + "first_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "last_id": "eval_67aa884cf6688190b58f657d4441c8b7", + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/evals?limit=1 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.evals.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const evalListResponse of client.evals.list()) { + console.log(evalListResponse.id); + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.EvalListPage; + import com.openai.models.evals.EvalListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EvalListPage page = client.evals().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.evals.list + + puts(page) + description: | + List evaluations for a project. + post: + operationId: createEval + tags: + - Evals + summary: Create eval + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEvalRequest' + responses: + '201': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Eval' + x-oaiMeta: + name: Create eval + group: evals + returns: The created [Eval](https://platform.openai.com/docs/api-reference/evals/object) object. + path: post + examples: + response: | + { + "object": "eval", + "id": "eval_67b7fa9a81a88190ab4aa417e397ea21", + "data_source_config": { + "type": "stored_completions", + "metadata": { + "usecase": "chatbot" + }, + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object" + }, + "sample": { + "type": "object" + } + }, + "required": [ + "item", + "sample" + ] + }, + "testing_criteria": [ + { + "name": "Example label grader", + "type": "label_model", + "model": "o3-mini", + "input": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "Classify the sentiment of the following statement as one of positive, neutral, or negative" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "Statement: {{item.input}}" + } + } + ], + "passing_labels": [ + "positive" + ], + "labels": [ + "positive", + "neutral", + "negative" + ] + } + ], + "name": "Sentiment", + "created_at": 1740110490, + "metadata": { + "description": "An eval for sentiment analysis" + } + } + request: + curl: | + curl https://api.openai.com/v1/evals \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Sentiment", + "data_source_config": { + "type": "stored_completions", + "metadata": { + "usecase": "chatbot" + } + }, + "testing_criteria": [ + { + "type": "label_model", + "model": "o3-mini", + "input": [ + { + "role": "developer", + "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'" + }, + { + "role": "user", + "content": "Statement: {{item.input}}" + } + ], + "passing_labels": [ + "positive" + ], + "labels": [ + "positive", + "neutral", + "negative" + ], + "name": "Example label grader" + } + ] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + eval = client.evals.create( + data_source_config={ + "item_schema": { + "foo": "bar" + }, + "type": "custom", + }, + testing_criteria=[{ + "input": [{ + "content": "content", + "role": "role", + }], + "labels": ["string"], + "model": "model", + "name": "name", + "passing_labels": ["string"], + "type": "label_model", + }], + ) + print(eval.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const _eval = await client.evals.create({ + data_source_config: { item_schema: { foo: 'bar' }, type: 'custom' }, + testing_criteria: [ + { + input: [{ content: 'content', role: 'role' }], + labels: ['string'], + model: 'model', + name: 'name', + passing_labels: ['string'], + type: 'label_model', + }, + ], + }); + + console.log(_eval.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.JsonValue; + import com.openai.models.evals.EvalCreateParams; + import com.openai.models.evals.EvalCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EvalCreateParams params = EvalCreateParams.builder() + .customDataSourceConfig(EvalCreateParams.DataSourceConfig.Custom.ItemSchema.builder() + .putAdditionalProperty("foo", JsonValue.from("bar")) + .build()) + .addTestingCriterion(EvalCreateParams.TestingCriterion.LabelModel.builder() + .addInput(EvalCreateParams.TestingCriterion.LabelModel.Input.SimpleInputMessage.builder() + .content("content") + .role("role") + .build()) + .addLabel("string") + .model("model") + .name("name") + .addPassingLabel("string") + .build()) + .build(); + EvalCreateResponse eval = client.evals().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + eval_ = openai.evals.create( + data_source_config: {item_schema: {foo: "bar"}, type: :custom}, + testing_criteria: [ + { + input: [{content: "content", role: "role"}], + labels: ["string"], + model: "model", + name: "name", + passing_labels: ["string"], + type: :label_model + } + ] + ) + + puts(eval_) + description: > + Create the structure of an evaluation that can be used to test a model's performance. + + An evaluation is a set of testing criteria and the config for a data source, which dictates the schema + of the data used in the evaluation. After creating an evaluation, you can run it on different models + and model parameters. We support several types of graders and datasources. + + For more information, see the [Evals guide](https://platform.openai.com/docs/guides/evals). + /evals/{eval_id}: + get: + operationId: getEval + tags: + - Evals + summary: Get an eval + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to retrieve. + responses: + '200': + description: The evaluation + content: + application/json: + schema: + $ref: '#/components/schemas/Eval' + x-oaiMeta: + name: Get an eval + group: evals + returns: >- + The [Eval](https://platform.openai.com/docs/api-reference/evals/object) object matching the + specified ID. + path: get + examples: + response: | + { + "object": "eval", + "id": "eval_67abd54d9b0081909a86353f6fb9317a", + "data_source_config": { + "type": "custom", + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object", + "properties": { + "input": { + "type": "string" + }, + "ground_truth": { + "type": "string" + } + }, + "required": [ + "input", + "ground_truth" + ] + } + }, + "required": [ + "item" + ] + } + }, + "testing_criteria": [ + { + "name": "String check", + "id": "String check-2eaf2d8d-d649-4335-8148-9535a7ca73c2", + "type": "string_check", + "input": "{{item.input}}", + "reference": "{{item.ground_truth}}", + "operation": "eq" + } + ], + "name": "External Data Eval", + "created_at": 1739314509, + "metadata": {}, + } + request: + curl: | + curl https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + eval = client.evals.retrieve( + "eval_id", + ) + print(eval.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const _eval = await client.evals.retrieve('eval_id'); + + console.log(_eval.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.EvalRetrieveParams; + import com.openai.models.evals.EvalRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EvalRetrieveResponse eval = client.evals().retrieve("eval_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + eval_ = openai.evals.retrieve("eval_id") + + puts(eval_) + description: | + Get an evaluation by ID. + post: + operationId: updateEval + tags: + - Evals + summary: Update an eval + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to update. + requestBody: + description: Request to update an evaluation + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + description: Rename the evaluation. + metadata: + $ref: '#/components/schemas/Metadata' + responses: + '200': + description: The updated evaluation + content: + application/json: + schema: + $ref: '#/components/schemas/Eval' + x-oaiMeta: + name: Update an eval + group: evals + returns: >- + The [Eval](https://platform.openai.com/docs/api-reference/evals/object) object matching the updated + version. + path: update + examples: + response: | + { + "object": "eval", + "id": "eval_67abd54d9b0081909a86353f6fb9317a", + "data_source_config": { + "type": "custom", + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object", + "properties": { + "input": { + "type": "string" + }, + "ground_truth": { + "type": "string" + } + }, + "required": [ + "input", + "ground_truth" + ] + } + }, + "required": [ + "item" + ] + } + }, + "testing_criteria": [ + { + "name": "String check", + "id": "String check-2eaf2d8d-d649-4335-8148-9535a7ca73c2", + "type": "string_check", + "input": "{{item.input}}", + "reference": "{{item.ground_truth}}", + "operation": "eq" + } + ], + "name": "Updated Eval", + "created_at": 1739314509, + "metadata": {"description": "Updated description"}, + } + request: + curl: | + curl https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"name": "Updated Eval", "metadata": {"description": "Updated description"}}' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + eval = client.evals.update( + eval_id="eval_id", + ) + print(eval.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const _eval = await client.evals.update('eval_id'); + + console.log(_eval.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.EvalUpdateParams; + import com.openai.models.evals.EvalUpdateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EvalUpdateResponse eval = client.evals().update("eval_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + eval_ = openai.evals.update("eval_id") + + puts(eval_) + description: | + Update certain properties of an evaluation. + delete: + operationId: deleteEval + tags: + - Evals + summary: Delete an eval + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to delete. + responses: + '200': + description: Successfully deleted the evaluation. + content: + application/json: + schema: + type: object + properties: + object: + type: string + example: eval.deleted + deleted: + type: boolean + example: true + eval_id: + type: string + example: eval_abc123 + required: + - object + - deleted + - eval_id + '404': + description: Evaluation not found. + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + x-oaiMeta: + name: Delete an eval + group: evals + returns: A deletion confirmation object. + examples: + response: | + { + "object": "eval.deleted", + "deleted": true, + "eval_id": "eval_abc123" + } + request: + curl: | + curl https://api.openai.com/v1/evals/eval_abc123 \ + -X DELETE \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + eval = client.evals.delete( + "eval_id", + ) + print(eval.eval_id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const _eval = await client.evals.delete('eval_id'); + + console.log(_eval.eval_id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.EvalDeleteParams; + import com.openai.models.evals.EvalDeleteResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + EvalDeleteResponse eval = client.evals().delete("eval_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + eval_ = openai.evals.delete("eval_id") + + puts(eval_) + description: | + Delete an evaluation. + /evals/{eval_id}/runs: + get: + operationId: getEvalRuns + tags: + - Evals + summary: Get eval runs + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to retrieve runs for. + - name: after + in: query + description: Identifier for the last run from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of runs to retrieve. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: >- + Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for descending order. + Defaults to `asc`. + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + - name: status + in: query + description: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed` | `canceled`. + required: false + schema: + type: string + enum: + - queued + - in_progress + - completed + - canceled + - failed + responses: + '200': + description: A list of runs for the evaluation + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRunList' + x-oaiMeta: + name: Get eval runs + group: evals + returns: >- + A list of [EvalRun](https://platform.openai.com/docs/api-reference/evals/run-object) objects + matching the specified ID. + path: get-runs + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "eval.run", + "id": "evalrun_67e0c7d31560819090d60c0780591042", + "eval_id": "eval_67e0c726d560819083f19a957c4c640b", + "report_url": "https://platform.openai.com/evaluations/eval_67e0c726d560819083f19a957c4c640b", + "status": "completed", + "model": "o3-mini", + "name": "bulk_with_negative_examples_o3-mini", + "created_at": 1742784467, + "result_counts": { + "total": 1, + "errored": 0, + "failed": 0, + "passed": 1 + }, + "per_model_usage": [ + { + "model_name": "o3-mini", + "invocation_count": 1, + "prompt_tokens": 563, + "completion_tokens": 874, + "total_tokens": 1437, + "cached_tokens": 0 + } + ], + "per_testing_criteria_results": [ + { + "testing_criteria": "Push Notification Summary Grader-1808cd0b-eeec-4e0b-a519-337e79f4f5d1", + "passed": 1, + "failed": 0 + } + ], + "data_source": { + "type": "completions", + "source": { + "type": "file_content", + "content": [ + { + "item": { + "notifications": "\n- New message from Sarah: \"Can you call me later?\"\n- Your package has been delivered!\n- Flash sale: 20% off electronics for the next 2 hours!\n" + } + } + ] + }, + "input_messages": { + "type": "template", + "template": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "\n\n\n\nYou are a helpful assistant that takes in an array of push notifications and returns a collapsed summary of them.\nThe push notification will be provided as follows:\n\n...notificationlist...\n\n\nYou should return just the summary and nothing else.\n\n\nYou should return a summary that is concise and snappy.\n\n\nHere is an example of a good summary:\n\n- Traffic alert: Accident reported on Main Street.- Package out for delivery: Expected by 5 PM.- New friend suggestion: Connect with Emma.\n\n\nTraffic alert, package expected by 5pm, suggestion for new friend (Emily).\n\n\n\nHere is an example of a bad summary:\n\n- Traffic alert: Accident reported on Main Street.- Package out for delivery: Expected by 5 PM.- New friend suggestion: Connect with Emma.\n\n\nTraffic alert reported on main street. You have a package that will arrive by 5pm, Emily is a new friend suggested for you.\n\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "{{item.notifications}}" + } + } + ] + }, + "model": "o3-mini", + "sampling_params": null + }, + "error": null, + "metadata": {} + } + ], + "first_id": "evalrun_67e0c7d31560819090d60c0780591042", + "last_id": "evalrun_67e0c7d31560819090d60c0780591042", + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/evals/egroup_67abd54d9b0081909a86353f6fb9317a/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.evals.runs.list( + eval_id="eval_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const runListResponse of client.evals.runs.list('eval_id')) { + console.log(runListResponse.id); + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.RunListPage; + import com.openai.models.evals.runs.RunListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunListPage page = client.evals().runs().list("eval_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.evals.runs.list("eval_id") + + puts(page) + description: | + Get a list of runs for an evaluation. + post: + operationId: createEvalRun + tags: + - Evals + summary: Create eval run + parameters: + - in: path + name: eval_id + required: true + schema: + type: string + description: The ID of the evaluation to create a run for. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateEvalRunRequest' + responses: + '201': + description: Successfully created a run for the evaluation + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRun' + '400': + description: Bad request (for example, missing eval object) + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + x-oaiMeta: + name: Create eval run + group: evals + returns: >- + The [EvalRun](https://platform.openai.com/docs/api-reference/evals/run-object) object matching the + specified ID. + examples: + response: | + { + "object": "eval.run", + "id": "evalrun_67e57965b480819094274e3a32235e4c", + "eval_id": "eval_67e579652b548190aaa83ada4b125f47", + "report_url": "https://platform.openai.com/evaluations/eval_67e579652b548190aaa83ada4b125f47&run_id=evalrun_67e57965b480819094274e3a32235e4c", + "status": "queued", + "model": "gpt-4o-mini", + "name": "gpt-4o-mini", + "created_at": 1743092069, + "result_counts": { + "total": 0, + "errored": 0, + "failed": 0, + "passed": 0 + }, + "per_model_usage": null, + "per_testing_criteria_results": null, + "data_source": { + "type": "completions", + "source": { + "type": "file_content", + "content": [ + { + "item": { + "input": "Tech Company Launches Advanced Artificial Intelligence Platform", + "ground_truth": "Technology" + } + } + ] + }, + "input_messages": { + "type": "template", + "template": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "{{item.input}}" + } + } + ] + }, + "model": "gpt-4o-mini", + "sampling_params": { + "seed": 42, + "temperature": 1.0, + "top_p": 1.0, + "max_completions_tokens": 2048 + } + }, + "error": null, + "metadata": {} + } + request: + curl: | + curl https://api.openai.com/v1/evals/eval_67e579652b548190aaa83ada4b125f47/runs \ + -X POST \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"name":"gpt-4o-mini","data_source":{"type":"completions","input_messages":{"type":"template","template":[{"role":"developer","content":"Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n"} , {"role":"user","content":"{{item.input}}"}]} ,"sampling_params":{"temperature":1,"max_completions_tokens":2048,"top_p":1,"seed":42},"model":"gpt-4o-mini","source":{"type":"file_content","content":[{"item":{"input":"Tech Company Launches Advanced Artificial Intelligence Platform","ground_truth":"Technology"}}]}}' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.evals.runs.create( + eval_id="eval_id", + data_source={ + "source": { + "content": [{ + "item": { + "foo": "bar" + } + }], + "type": "file_content", + }, + "type": "jsonl", + }, + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.evals.runs.create('eval_id', { + data_source: { source: { content: [{ item: { foo: 'bar' } }], type: 'file_content' }, type: 'jsonl' }, + }); + + console.log(run.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.JsonValue; + import com.openai.models.evals.runs.CreateEvalJsonlRunDataSource; + import com.openai.models.evals.runs.RunCreateParams; + import com.openai.models.evals.runs.RunCreateResponse; + import java.util.List; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCreateParams params = RunCreateParams.builder() + .evalId("eval_id") + .dataSource(CreateEvalJsonlRunDataSource.builder() + .fileContentSource(List.of(CreateEvalJsonlRunDataSource.Source.FileContent.Content.builder() + .item(CreateEvalJsonlRunDataSource.Source.FileContent.Content.Item.builder() + .putAdditionalProperty("foo", JsonValue.from("bar")) + .build()) + .build())) + .build()) + .build(); + RunCreateResponse run = client.evals().runs().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.evals.runs.create( + "eval_id", + data_source: {source: {content: [{item: {foo: "bar"}}], type: :file_content}, type: :jsonl} + ) + + puts(run) + description: > + Kicks off a new run for a given evaluation, specifying the data source, and what model configuration + to use to test. The datasource will be validated against the schema specified in the config of the + evaluation. + /evals/{eval_id}/runs/{run_id}: + get: + operationId: getEvalRun + tags: + - Evals + summary: Get an eval run + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to retrieve runs for. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run to retrieve. + responses: + '200': + description: The evaluation run + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRun' + x-oaiMeta: + name: Get an eval run + group: evals + returns: >- + The [EvalRun](https://platform.openai.com/docs/api-reference/evals/run-object) object matching the + specified ID. + path: get + examples: + response: | + { + "object": "eval.run", + "id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "report_url": "https://platform.openai.com/evaluations/eval_67abd54d9b0081909a86353f6fb9317a?run_id=evalrun_67abd54d60ec8190832b46859da808f7", + "status": "queued", + "model": "gpt-4o-mini", + "name": "gpt-4o-mini", + "created_at": 1743092069, + "result_counts": { + "total": 0, + "errored": 0, + "failed": 0, + "passed": 0 + }, + "per_model_usage": null, + "per_testing_criteria_results": null, + "data_source": { + "type": "completions", + "source": { + "type": "file_content", + "content": [ + { + "item": { + "input": "Tech Company Launches Advanced Artificial Intelligence Platform", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Central Bank Increases Interest Rates Amid Inflation Concerns", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Summit Addresses Climate Change Strategies", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Major Retailer Reports Record-Breaking Holiday Sales", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "National Team Qualifies for World Championship Finals", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Stock Markets Rally After Positive Economic Data Released", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "Global Manufacturer Announces Merger with Competitor", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Breakthrough in Renewable Energy Technology Unveiled", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "World Leaders Sign Historic Climate Agreement", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Professional Athlete Sets New Record in Championship Event", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Financial Institutions Adapt to New Regulatory Requirements", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Tech Conference Showcases Advances in Artificial Intelligence", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Global Markets Respond to Oil Price Fluctuations", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Cooperation Strengthened Through New Treaty", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Sports League Announces Revised Schedule for Upcoming Season", + "ground_truth": "Sports" + } + } + ] + }, + "input_messages": { + "type": "template", + "template": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "{{item.input}}" + } + } + ] + }, + "model": "gpt-4o-mini", + "sampling_params": { + "seed": 42, + "temperature": 1.0, + "top_p": 1.0, + "max_completions_tokens": 2048 + } + }, + "error": null, + "metadata": {} + } + request: + curl: > + curl + https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a/runs/evalrun_67abd54d60ec8190832b46859da808f7 + \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.evals.runs.retrieve( + run_id="run_id", + eval_id="eval_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.evals.runs.retrieve('run_id', { eval_id: 'eval_id' }); + + console.log(run.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.RunRetrieveParams; + import com.openai.models.evals.runs.RunRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunRetrieveParams params = RunRetrieveParams.builder() + .evalId("eval_id") + .runId("run_id") + .build(); + RunRetrieveResponse run = client.evals().runs().retrieve(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.evals.runs.retrieve("run_id", eval_id: "eval_id") + + puts(run) + description: | + Get an evaluation run by ID. + post: + operationId: cancelEvalRun + tags: + - Evals + summary: Cancel eval run + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation whose run you want to cancel. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run to cancel. + responses: + '200': + description: The canceled eval run object + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRun' + x-oaiMeta: + name: Cancel eval run + group: evals + returns: >- + The updated [EvalRun](https://platform.openai.com/docs/api-reference/evals/run-object) object + reflecting that the run is canceled. + path: post + examples: + response: | + { + "object": "eval.run", + "id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "report_url": "https://platform.openai.com/evaluations/eval_67abd54d9b0081909a86353f6fb9317a?run_id=evalrun_67abd54d60ec8190832b46859da808f7", + "status": "canceled", + "model": "gpt-4o-mini", + "name": "gpt-4o-mini", + "created_at": 1743092069, + "result_counts": { + "total": 0, + "errored": 0, + "failed": 0, + "passed": 0 + }, + "per_model_usage": null, + "per_testing_criteria_results": null, + "data_source": { + "type": "completions", + "source": { + "type": "file_content", + "content": [ + { + "item": { + "input": "Tech Company Launches Advanced Artificial Intelligence Platform", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Central Bank Increases Interest Rates Amid Inflation Concerns", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Summit Addresses Climate Change Strategies", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Major Retailer Reports Record-Breaking Holiday Sales", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "National Team Qualifies for World Championship Finals", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Stock Markets Rally After Positive Economic Data Released", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "Global Manufacturer Announces Merger with Competitor", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Breakthrough in Renewable Energy Technology Unveiled", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "World Leaders Sign Historic Climate Agreement", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Professional Athlete Sets New Record in Championship Event", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Financial Institutions Adapt to New Regulatory Requirements", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Tech Conference Showcases Advances in Artificial Intelligence", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Global Markets Respond to Oil Price Fluctuations", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Cooperation Strengthened Through New Treaty", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Sports League Announces Revised Schedule for Upcoming Season", + "ground_truth": "Sports" + } + } + ] + }, + "input_messages": { + "type": "template", + "template": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "{{item.input}}" + } + } + ] + }, + "model": "gpt-4o-mini", + "sampling_params": { + "seed": 42, + "temperature": 1.0, + "top_p": 1.0, + "max_completions_tokens": 2048 + } + }, + "error": null, + "metadata": {} + } + request: + curl: > + curl + https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a/runs/evalrun_67abd54d60ec8190832b46859da808f7/cancel + \ + -X POST \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.evals.runs.cancel( + run_id="run_id", + eval_id="eval_id", + ) + print(response.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.evals.runs.cancel('run_id', { eval_id: 'eval_id' }); + + console.log(response.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.RunCancelParams; + import com.openai.models.evals.runs.RunCancelResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCancelParams params = RunCancelParams.builder() + .evalId("eval_id") + .runId("run_id") + .build(); + RunCancelResponse response = client.evals().runs().cancel(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.evals.runs.cancel("run_id", eval_id: "eval_id") + + puts(response) + description: | + Cancel an ongoing evaluation run. + delete: + operationId: deleteEvalRun + tags: + - Evals + summary: Delete eval run + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to delete the run from. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run to delete. + responses: + '200': + description: Successfully deleted the eval run + content: + application/json: + schema: + type: object + properties: + object: + type: string + example: eval.run.deleted + deleted: + type: boolean + example: true + run_id: + type: string + example: evalrun_677469f564d48190807532a852da3afb + '404': + description: Run not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + x-oaiMeta: + name: Delete eval run + group: evals + returns: An object containing the status of the delete operation. + path: delete + examples: + response: | + { + "object": "eval.run.deleted", + "deleted": true, + "run_id": "evalrun_abc456" + } + request: + curl: | + curl https://api.openai.com/v1/evals/eval_123abc/runs/evalrun_abc456 \ + -X DELETE \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.evals.runs.delete( + run_id="run_id", + eval_id="eval_id", + ) + print(run.run_id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.evals.runs.delete('run_id', { eval_id: 'eval_id' }); + + console.log(run.run_id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.RunDeleteParams; + import com.openai.models.evals.runs.RunDeleteResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunDeleteParams params = RunDeleteParams.builder() + .evalId("eval_id") + .runId("run_id") + .build(); + RunDeleteResponse run = client.evals().runs().delete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.evals.runs.delete("run_id", eval_id: "eval_id") + + puts(run) + description: | + Delete an eval run. + /evals/{eval_id}/runs/{run_id}/output_items: + get: + operationId: getEvalRunOutputItems + tags: + - Evals + summary: Get eval run output items + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to retrieve runs for. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run to retrieve output items for. + - name: after + in: query + description: Identifier for the last output item from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of output items to retrieve. + required: false + schema: + type: integer + default: 20 + - name: status + in: query + description: | + Filter output items by status. Use `failed` to filter by failed output + items or `pass` to filter by passed output items. + required: false + schema: + type: string + enum: + - fail + - pass + - name: order + in: query + description: >- + Sort order for output items by timestamp. Use `asc` for ascending order or `desc` for descending + order. Defaults to `asc`. + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + responses: + '200': + description: A list of output items for the evaluation run + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRunOutputItemList' + x-oaiMeta: + name: Get eval run output items + group: evals + returns: >- + A list of + [EvalRunOutputItem](https://platform.openai.com/docs/api-reference/evals/run-output-item-object) + objects matching the specified ID. + path: get + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "eval.run.output_item", + "id": "outputitem_67e5796c28e081909917bf79f6e6214d", + "created_at": 1743092076, + "run_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "status": "pass", + "datasource_item_id": 5, + "datasource_item": { + "input": "Stock Markets Rally After Positive Economic Data Released", + "ground_truth": "Markets" + }, + "results": [ + { + "name": "String check-a2486074-d803-4445-b431-ad2262e85d47", + "sample": null, + "passed": true, + "score": 1.0 + } + ], + "sample": { + "input": [ + { + "role": "developer", + "content": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + }, + { + "role": "user", + "content": "Stock Markets Rally After Positive Economic Data Released", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + } + ], + "output": [ + { + "role": "assistant", + "content": "Markets", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + } + ], + "finish_reason": "stop", + "model": "gpt-4o-mini-2024-07-18", + "usage": { + "total_tokens": 325, + "completion_tokens": 2, + "prompt_tokens": 323, + "cached_tokens": 0 + }, + "error": null, + "temperature": 1.0, + "max_completion_tokens": 2048, + "top_p": 1.0, + "seed": 42 + } + } + ], + "first_id": "outputitem_67e5796c28e081909917bf79f6e6214d", + "last_id": "outputitem_67e5796c28e081909917bf79f6e6214d", + "has_more": true + } + request: + curl: > + curl + https://api.openai.com/v1/evals/egroup_67abd54d9b0081909a86353f6fb9317a/runs/erun_67abd54d60ec8190832b46859da808f7/output_items + \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.evals.runs.output_items.list( + run_id="run_id", + eval_id="eval_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const outputItemListResponse of client.evals.runs.outputItems.list('run_id', { + eval_id: 'eval_id', + })) { + console.log(outputItemListResponse.id); + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.outputitems.OutputItemListPage; + import com.openai.models.evals.runs.outputitems.OutputItemListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + OutputItemListParams params = OutputItemListParams.builder() + .evalId("eval_id") + .runId("run_id") + .build(); + OutputItemListPage page = client.evals().runs().outputItems().list(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.evals.runs.output_items.list("run_id", eval_id: "eval_id") + + puts(page) + description: | + Get a list of output items for an evaluation run. + /evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}: + get: + operationId: getEvalRunOutputItem + tags: + - Evals + summary: Get an output item of an eval run + parameters: + - name: eval_id + in: path + required: true + schema: + type: string + description: The ID of the evaluation to retrieve runs for. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run to retrieve. + - name: output_item_id + in: path + required: true + schema: + type: string + description: The ID of the output item to retrieve. + responses: + '200': + description: The evaluation run output item + content: + application/json: + schema: + $ref: '#/components/schemas/EvalRunOutputItem' + x-oaiMeta: + name: Get an output item of an eval run + group: evals + returns: >- + The [EvalRunOutputItem](https://platform.openai.com/docs/api-reference/evals/run-output-item-object) + object matching the specified ID. + path: get + examples: + response: | + { + "object": "eval.run.output_item", + "id": "outputitem_67e5796c28e081909917bf79f6e6214d", + "created_at": 1743092076, + "run_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "status": "pass", + "datasource_item_id": 5, + "datasource_item": { + "input": "Stock Markets Rally After Positive Economic Data Released", + "ground_truth": "Markets" + }, + "results": [ + { + "name": "String check-a2486074-d803-4445-b431-ad2262e85d47", + "sample": null, + "passed": true, + "score": 1.0 + } + ], + "sample": { + "input": [ + { + "role": "developer", + "content": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + }, + { + "role": "user", + "content": "Stock Markets Rally After Positive Economic Data Released", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + } + ], + "output": [ + { + "role": "assistant", + "content": "Markets", + "tool_call_id": null, + "tool_calls": null, + "function_call": null + } + ], + "finish_reason": "stop", + "model": "gpt-4o-mini-2024-07-18", + "usage": { + "total_tokens": 325, + "completion_tokens": 2, + "prompt_tokens": 323, + "cached_tokens": 0 + }, + "error": null, + "temperature": 1.0, + "max_completion_tokens": 2048, + "top_p": 1.0, + "seed": 42 + } + } + request: + curl: > + curl + https://api.openai.com/v1/evals/eval_67abd54d9b0081909a86353f6fb9317a/runs/evalrun_67abd54d60ec8190832b46859da808f7/output_items/outputitem_67abd55eb6548190bb580745d5644a33 + \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + output_item = client.evals.runs.output_items.retrieve( + output_item_id="output_item_id", + eval_id="eval_id", + run_id="run_id", + ) + print(output_item.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const outputItem = await client.evals.runs.outputItems.retrieve('output_item_id', { + eval_id: 'eval_id', + run_id: 'run_id', + }); + + console.log(outputItem.id); + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.evals.runs.outputitems.OutputItemRetrieveParams; + import com.openai.models.evals.runs.outputitems.OutputItemRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + OutputItemRetrieveParams params = OutputItemRetrieveParams.builder() + .evalId("eval_id") + .runId("run_id") + .outputItemId("output_item_id") + .build(); + OutputItemRetrieveResponse outputItem = client.evals().runs().outputItems().retrieve(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + output_item = openai.evals.runs.output_items.retrieve("output_item_id", eval_id: "eval_id", + run_id: "run_id") + + + puts(output_item) + description: | + Get an evaluation run output item by ID. + /files: + get: + operationId: listFiles + tags: + - Files + summary: List files + parameters: + - in: query + name: purpose + required: false + schema: + type: string + description: Only return files with the given purpose. + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the + default is 10,000. + required: false + schema: + type: integer + default: 10000 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFilesResponse' + x-oaiMeta: + name: List files + group: files + returns: A list of [File](https://platform.openai.com/docs/api-reference/files/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "file-abc123", + "object": "file", + "bytes": 175, + "created_at": 1613677385, + "expires_at": 1677614202, + "filename": "salesOverview.pdf", + "purpose": "assistants", + }, + { + "id": "file-abc456", + "object": "file", + "bytes": 140, + "created_at": 1613779121, + "expires_at": 1677614202, + "filename": "puppy.jsonl", + "purpose": "fine-tune", + } + ], + "first_id": "file-abc123", + "last_id": "file-abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/files \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.files.list() + page = page.data[0] + print(page) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const fileObject of client.files.list()) { + console.log(fileObject); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Files.List(context.TODO(), openai.FileListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.files.FileListPage; + import com.openai.models.files.FileListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileListPage page = client.files().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.files.list + + puts(page) + description: Returns a list of files. + post: + operationId: createFile + tags: + - Files + summary: Upload file + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateFileRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFile' + x-oaiMeta: + name: Upload file + group: files + returns: The uploaded [File](https://platform.openai.com/docs/api-reference/files/object) object. + examples: + response: | + { + "id": "file-abc123", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "expires_at": 1677614202, + "filename": "mydata.jsonl", + "purpose": "fine-tune", + } + request: + curl: | + curl https://api.openai.com/v1/files \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F purpose="fine-tune" \ + -F file="@mydata.jsonl" + -F expires_after[anchor]="created_at" + -F expires_after[seconds]=2592000 + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + file_object = client.files.create( + file=b"raw file contents", + purpose="assistants", + ) + print(file_object.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fileObject = await client.files.create({ + file: fs.createReadStream('fine-tune.jsonl'), + purpose: 'assistants', + }); + + console.log(fileObject.id); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fileObject, err := client.Files.New(context.TODO(), openai.FileNewParams{ + File: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + Purpose: openai.FilePurposeAssistants, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fileObject.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.files.FileCreateParams; + import com.openai.models.files.FileObject; + import com.openai.models.files.FilePurpose; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileCreateParams params = FileCreateParams.builder() + .file(ByteArrayInputStream("some content".getBytes())) + .purpose(FilePurpose.ASSISTANTS) + .build(); + FileObject fileObject = client.files().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + file_object = openai.files.create(file: Pathname(__FILE__), purpose: :assistants) + + puts(file_object) + description: | + Upload a file that can be used across various endpoints. Individual files + can be up to 512 MB, and the size of all files uploaded by one organization + can be up to 1 TB. + + - The Assistants API supports files up to 2 million tokens and of specific + file types. See the [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for + details. + - The Fine-tuning API only supports `.jsonl` files. The input also has + certain required formats for fine-tuning + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) models. + - The Batch API only supports `.jsonl` files up to 200 MB in size. The input + also has a specific required + [format](https://platform.openai.com/docs/api-reference/batch/request-input). + + Please [contact us](https://help.openai.com/) if you need to increase these + storage limits. + /files/{file_id}: + delete: + operationId: deleteFile + tags: + - Files + summary: Delete file + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteFileResponse' + x-oaiMeta: + name: Delete file + group: files + returns: Deletion status. + examples: + response: | + { + "id": "file-abc123", + "object": "file", + "deleted": true + } + request: + curl: | + curl https://api.openai.com/v1/files/file-abc123 \ + -X DELETE \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + file_deleted = client.files.delete( + "file_id", + ) + print(file_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fileDeleted = await client.files.delete('file_id'); + + console.log(fileDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fileDeleted, err := client.Files.Delete(context.TODO(), "file_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fileDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.files.FileDeleteParams; + import com.openai.models.files.FileDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileDeleted fileDeleted = client.files().delete("file_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + file_deleted = openai.files.delete("file_id") + + puts(file_deleted) + description: Delete a file and remove it from all vector stores. + get: + operationId: retrieveFile + tags: + - Files + summary: Retrieve file + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIFile' + x-oaiMeta: + name: Retrieve file + group: files + returns: >- + The [File](https://platform.openai.com/docs/api-reference/files/object) object matching the + specified ID. + examples: + response: | + { + "id": "file-abc123", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "expires_at": 1677614202, + "filename": "mydata.jsonl", + "purpose": "fine-tune", + } + request: + curl: | + curl https://api.openai.com/v1/files/file-abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + file_object = client.files.retrieve( + "file_id", + ) + print(file_object.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fileObject = await client.files.retrieve('file_id'); + + console.log(fileObject.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fileObject, err := client.Files.Get(context.TODO(), "file_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fileObject.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.files.FileObject; + import com.openai.models.files.FileRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileObject fileObject = client.files().retrieve("file_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + file_object = openai.files.retrieve("file_id") + + puts(file_object) + description: Returns information about a specific file. + /files/{file_id}/content: + get: + operationId: downloadFile + tags: + - Files + summary: Retrieve file content + parameters: + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to use for this request. + responses: + '200': + description: OK + content: + application/json: + schema: + type: string + x-oaiMeta: + name: Retrieve file content + group: files + returns: The file content. + examples: + response: '' + request: + curl: | + curl https://api.openai.com/v1/files/file-abc123/content \ + -H "Authorization: Bearer $OPENAI_API_KEY" > file.jsonl + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.files.content( + "file_id", + ) + print(response) + content = response.read() + print(content) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.files.content('file_id'); + + console.log(response); + + const content = await response.blob(); + console.log(content); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Files.Content(context.TODO(), "file_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.http.HttpResponse; + import com.openai.models.files.FileContentParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + HttpResponse response = client.files().content("file_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.files.content("file_id") + + puts(response) + description: Returns the contents of the specified file. + /fine_tuning/alpha/graders/run: + post: + operationId: runGrader + tags: + - Fine-tuning + summary: Run grader + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RunGraderRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunGraderResponse' + x-oaiMeta: + name: Run grader + beta: true + group: graders + returns: The results from the grader run. + examples: + response: | + { + "reward": 1.0, + "metadata": { + "name": "Example score model grader", + "type": "score_model", + "errors": { + "formula_parse_error": false, + "sample_parse_error": false, + "truncated_observation_error": false, + "unresponsive_reward_error": false, + "invalid_variable_error": false, + "other_error": false, + "python_grader_server_error": false, + "python_grader_server_error_type": null, + "python_grader_runtime_error": false, + "python_grader_runtime_error_details": null, + "model_grader_server_error": false, + "model_grader_refusal_error": false, + "model_grader_parse_error": false, + "model_grader_server_error_details": null + }, + "execution_time": 4.365238428115845, + "scores": {}, + "token_usage": { + "prompt_tokens": 190, + "total_tokens": 324, + "completion_tokens": 134, + "cached_tokens": 0 + }, + "sampled_model_name": "gpt-4o-2024-08-06" + }, + "sub_rewards": {}, + "model_grader_token_usage_per_model": { + "gpt-4o-2024-08-06": { + "prompt_tokens": 190, + "total_tokens": 324, + "completion_tokens": 134, + "cached_tokens": 0 + } + } + } + request: + curl: > + curl -X POST https://api.openai.com/v1/fine_tuning/alpha/graders/run \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "grader": { + "type": "score_model", + "name": "Example score model grader", + "input": [ + { + "role": "user", + "content": "Score how close the reference answer is to the model + answer. Score 1.0 if they are the same and 0.0 if they are different. Return just a floating + point score\n\nReference answer: {{item.reference_answer}}\n\nModel answer: + {{sample.output_text}}" + } + ], + "model": "gpt-4o-2024-08-06", + "sampling_params": { + "temperature": 1, + "top_p": 1, + "seed": 42 + } + }, + "item": { + "reference_answer": "fuzzy wuzzy was a bear" + }, + "model_sample": "fuzzy wuzzy was a bear" + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.fineTuning.alpha.graders.run({ + grader: { input: 'input', name: 'name', operation: 'eq', reference: 'reference', type: 'string_check' }, + model_sample: 'model_sample', + }); + + console.log(response.metadata); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.fine_tuning.alpha.graders.run( + grader={ + "input": "input", + "name": "name", + "operation": "eq", + "reference": "reference", + "type": "string_check", + }, + model_sample="model_sample", + ) + print(response.metadata) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.FineTuning.Alpha.Graders.Run(context.TODO(), openai.FineTuningAlphaGraderRunParams{ + Grader: openai.FineTuningAlphaGraderRunParamsGraderUnion{ + OfStringCheck: &openai.StringCheckGraderParam{ + Input: "input", + Name: "name", + Operation: openai.StringCheckGraderOperationEq, + Reference: "reference", + }, + }, + ModelSample: "model_sample", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.Metadata) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.alpha.graders.GraderRunParams; + import com.openai.models.finetuning.alpha.graders.GraderRunResponse; + import com.openai.models.graders.gradermodels.StringCheckGrader; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + GraderRunParams params = GraderRunParams.builder() + .grader(StringCheckGrader.builder() + .input("input") + .name("name") + .operation(StringCheckGrader.Operation.EQ) + .reference("reference") + .build()) + .modelSample("model_sample") + .build(); + GraderRunResponse response = client.fineTuning().alpha().graders().run(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.fine_tuning.alpha.graders.run( + grader: {input: "input", name: "name", operation: :eq, reference: "reference", type: :string_check}, + model_sample: "model_sample" + ) + + puts(response) + description: | + Run a grader. + /fine_tuning/alpha/graders/validate: + post: + operationId: validateGrader + tags: + - Fine-tuning + summary: Validate grader + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ValidateGraderRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ValidateGraderResponse' + x-oaiMeta: + name: Validate grader + beta: true + group: graders + returns: The validated grader object. + examples: + response: | + { + "grader": { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + } + } + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/alpha/graders/validate \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "grader": { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + } + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.fineTuning.alpha.graders.validate({ + grader: { input: 'input', name: 'name', operation: 'eq', reference: 'reference', type: 'string_check' }, + }); + + console.log(response.grader); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.fine_tuning.alpha.graders.validate( + grader={ + "input": "input", + "name": "name", + "operation": "eq", + "reference": "reference", + "type": "string_check", + }, + ) + print(response.grader) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.FineTuning.Alpha.Graders.Validate(context.TODO(), openai.FineTuningAlphaGraderValidateParams{ + Grader: openai.FineTuningAlphaGraderValidateParamsGraderUnion{ + OfStringCheckGrader: &openai.StringCheckGraderParam{ + Input: "input", + Name: "name", + Operation: openai.StringCheckGraderOperationEq, + Reference: "reference", + }, + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.Grader) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.alpha.graders.GraderValidateParams; + import com.openai.models.finetuning.alpha.graders.GraderValidateResponse; + import com.openai.models.graders.gradermodels.StringCheckGrader; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + GraderValidateParams params = GraderValidateParams.builder() + .grader(StringCheckGrader.builder() + .input("input") + .name("name") + .operation(StringCheckGrader.Operation.EQ) + .reference("reference") + .build()) + .build(); + GraderValidateResponse response = client.fineTuning().alpha().graders().validate(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.fine_tuning.alpha.graders.validate( + grader: {input: "input", name: "name", operation: :eq, reference: "reference", type: :string_check} + ) + + puts(response) + description: | + Validate a grader. + /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions: + get: + operationId: listFineTuningCheckpointPermissions + tags: + - Fine-tuning + summary: List checkpoint permissions + parameters: + - in: path + name: fine_tuned_model_checkpoint + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuned model checkpoint to get permissions for. + - name: project_id + in: query + description: The ID of the project to get permissions for. + required: false + schema: + type: string + - name: after + in: query + description: Identifier for the last permission ID from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of permissions to retrieve. + required: false + schema: + type: integer + default: 10 + - name: order + in: query + description: The order in which to retrieve permissions. + required: false + schema: + type: string + enum: + - ascending + - descending + default: descending + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTuningCheckpointPermissionResponse' + x-oaiMeta: + name: List checkpoint permissions + group: fine-tuning + returns: >- + A list of fine-tuned model checkpoint [permission + objects](https://platform.openai.com/docs/api-reference/fine-tuning/permission-object) for a + fine-tuned model checkpoint. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "checkpoint.permission", + "id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "created_at": 1721764867, + "project_id": "proj_abGMw1llN8IrBb6SvvY5A1iH" + }, + { + "object": "checkpoint.permission", + "id": "cp_enQCFmOTGj3syEpYVhBRLTSy", + "created_at": 1721764800, + "project_id": "proj_iqGMw1llN8IrBb6SvvY5A1oF" + }, + ], + "first_id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "last_id": "cp_enQCFmOTGj3syEpYVhBRLTSy", + "has_more": false + } + request: + curl: > + curl + https://api.openai.com/v1/fine_tuning/checkpoints/ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd/permissions + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const permission = await + client.fineTuning.checkpoints.permissions.retrieve('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + + + console.log(permission.first_id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + permission = client.fine_tuning.checkpoints.permissions.retrieve( + fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + print(permission.first_id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + permission, err := client.FineTuning.Checkpoints.Permissions.Get( + context.TODO(), + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + openai.FineTuningCheckpointPermissionGetParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", permission.FirstID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.checkpoints.permissions.PermissionRetrieveParams; + import com.openai.models.finetuning.checkpoints.permissions.PermissionRetrieveResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + PermissionRetrieveResponse permission = client.fineTuning().checkpoints().permissions().retrieve("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + permission = openai.fine_tuning.checkpoints.permissions.retrieve("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(permission) + description: | + **NOTE:** This endpoint requires an [admin API key](../admin-api-keys). + + Organization owners can use this endpoint to view all permissions for a fine-tuned model checkpoint. + post: + operationId: createFineTuningCheckpointPermission + tags: + - Fine-tuning + summary: Create checkpoint permissions + parameters: + - in: path + name: fine_tuned_model_checkpoint + required: true + schema: + type: string + example: ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd + description: | + The ID of the fine-tuned model checkpoint to create a permission for. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateFineTuningCheckpointPermissionRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTuningCheckpointPermissionResponse' + x-oaiMeta: + name: Create checkpoint permissions + group: fine-tuning + returns: >- + A list of fine-tuned model checkpoint [permission + objects](https://platform.openai.com/docs/api-reference/fine-tuning/permission-object) for a + fine-tuned model checkpoint. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "checkpoint.permission", + "id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "created_at": 1721764867, + "project_id": "proj_abGMw1llN8IrBb6SvvY5A1iH" + } + ], + "first_id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "last_id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "has_more": false + } + request: + curl: > + curl + https://api.openai.com/v1/fine_tuning/checkpoints/ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd/permissions + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + -d '{"project_ids": ["proj_abGMw1llN8IrBb6SvvY5A1iH"]}' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const permissionCreateResponse of client.fineTuning.checkpoints.permissions.create( + 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + { project_ids: ['string'] }, + )) { + console.log(permissionCreateResponse.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.fine_tuning.checkpoints.permissions.create( + fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd", + project_ids=["string"], + ) + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.FineTuning.Checkpoints.Permissions.New( + context.TODO(), + "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd", + openai.FineTuningCheckpointPermissionNewParams{ + ProjectIDs: []string{"string"}, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.checkpoints.permissions.PermissionCreatePage; + import com.openai.models.finetuning.checkpoints.permissions.PermissionCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + PermissionCreateParams params = PermissionCreateParams.builder() + .fineTunedModelCheckpoint("ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd") + .addProjectId("string") + .build(); + PermissionCreatePage page = client.fineTuning().checkpoints().permissions().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.fine_tuning.checkpoints.permissions.create( + "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd", + project_ids: ["string"] + ) + + puts(page) + description: | + **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys). + + This enables organization owners to share fine-tuned models with other projects in their organization. + /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}: + delete: + operationId: deleteFineTuningCheckpointPermission + tags: + - Fine-tuning + summary: Delete checkpoint permission + parameters: + - in: path + name: fine_tuned_model_checkpoint + required: true + schema: + type: string + example: ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd + description: | + The ID of the fine-tuned model checkpoint to delete a permission for. + - in: path + name: permission_id + required: true + schema: + type: string + example: cp_zc4Q7MP6XxulcVzj4MZdwsAB + description: | + The ID of the fine-tuned model checkpoint permission to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteFineTuningCheckpointPermissionResponse' + x-oaiMeta: + name: Delete checkpoint permission + group: fine-tuning + returns: >- + The deletion status of the fine-tuned model checkpoint [permission + object](https://platform.openai.com/docs/api-reference/fine-tuning/permission-object). + examples: + response: | + { + "object": "checkpoint.permission", + "id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "deleted": true + } + request: + curl: > + curl + https://api.openai.com/v1/fine_tuning/checkpoints/ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd/permissions/cp_zc4Q7MP6XxulcVzj4MZdwsAB + \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const permission = await + client.fineTuning.checkpoints.permissions.delete('cp_zc4Q7MP6XxulcVzj4MZdwsAB', { + fine_tuned_model_checkpoint: 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + }); + + + console.log(permission.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + permission = client.fine_tuning.checkpoints.permissions.delete( + permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB", + fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd", + ) + print(permission.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + permission, err := client.FineTuning.Checkpoints.Permissions.Delete( + context.TODO(), + "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd", + "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", permission.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.checkpoints.permissions.PermissionDeleteParams; + import com.openai.models.finetuning.checkpoints.permissions.PermissionDeleteResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + PermissionDeleteParams params = PermissionDeleteParams.builder() + .fineTunedModelCheckpoint("ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd") + .permissionId("cp_zc4Q7MP6XxulcVzj4MZdwsAB") + .build(); + PermissionDeleteResponse permission = client.fineTuning().checkpoints().permissions().delete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + permission = openai.fine_tuning.checkpoints.permissions.delete( + "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + fine_tuned_model_checkpoint: "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd" + ) + + puts(permission) + description: | + **NOTE:** This endpoint requires an [admin API key](../admin-api-keys). + + Organization owners can use this endpoint to delete a permission for a fine-tuned model checkpoint. + /fine_tuning/jobs: + post: + operationId: createFineTuningJob + tags: + - Fine-tuning + summary: Create fine-tuning job + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateFineTuningJobRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTuningJob' + x-oaiMeta: + name: Create fine-tuning job + group: fine-tuning + returns: A [fine-tuning.job](https://platform.openai.com/docs/api-reference/fine-tuning/object) object. + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-BK7bzQj3FfZFXr7DbL6xJwfo", + "model": "gpt-4o-mini" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": null, + "training_file": "file-abc123", + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + }, + "metadata": null + } + - title: Epochs + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc123", + "model": "gpt-4o-mini", + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 2 + } + } + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": null, + "training_file": "file-abc123", + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": 2 + }, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": 2 + } + } + }, + "metadata": null, + "error": { + "code": null, + "message": null, + "param": null + }, + "finished_at": null, + "seed": 683058546, + "trained_tokens": null, + "estimated_finish": null, + "integrations": [], + "user_provided_suffix": null, + "usage_metrics": null, + "shared_with_openai": false + } + - title: DPO + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc123", + "validation_file": "file-abc123", + "model": "gpt-4o-mini", + "method": { + "type": "dpo", + "dpo": { + "hyperparameters": { + "beta": 0.1 + } + } + } + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + python: | + from openai import OpenAI + from openai.types.fine_tuning import DpoMethod, DpoHyperparameters + + client = OpenAI() + + client.fine_tuning.jobs.create( + training_file="file-abc", + validation_file="file-123", + model="gpt-4o-mini", + method={ + "type": "dpo", + "dpo": DpoMethod( + hyperparameters=DpoHyperparameters(beta=0.1) + ) + } + ) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc", + "model": "gpt-4o-mini", + "created_at": 1746130590, + "fine_tuned_model": null, + "organization_id": "org-abc", + "result_files": [], + "status": "queued", + "validation_file": "file-123", + "training_file": "file-abc", + "method": { + "type": "dpo", + "dpo": { + "hyperparameters": { + "beta": 0.1, + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto" + } + } + }, + "metadata": null, + "error": { + "code": null, + "message": null, + "param": null + }, + "finished_at": null, + "hyperparameters": null, + "seed": 1036326793, + "estimated_finish": null, + "integrations": [], + "user_provided_suffix": null, + "usage_metrics": null, + "shared_with_openai": false + } + - title: Reinforcement + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc", + "validation_file": "file-123", + "model": "o4-mini", + "method": { + "type": "reinforcement", + "reinforcement": { + "grader": { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + }, + "hyperparameters": { + "reasoning_effort": "medium" + } + } + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "o4-mini", + "created_at": 1721764800, + "finished_at": null, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "validating_files", + "validation_file": "file-123", + "training_file": "file-abc", + "trained_tokens": null, + "error": {}, + "user_provided_suffix": null, + "seed": 950189191, + "estimated_finish": null, + "integrations": [], + "method": { + "type": "reinforcement", + "reinforcement": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + "eval_interval": "auto", + "eval_samples": "auto", + "compute_multiplier": "auto", + "reasoning_effort": "medium" + }, + "grader": { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + }, + "response_format": null + } + }, + "metadata": null, + "usage_metrics": null, + "shared_with_openai": false + } + + - title: Validation file + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc123", + "validation_file": "file-abc123", + "model": "gpt-4o-mini" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": "file-abc123", + "training_file": "file-abc123", + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + }, + "metadata": null + } + - title: W&B Integration + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc123", + "validation_file": "file-abc123", + "model": "gpt-4o-mini", + "integrations": [ + { + "type": "wandb", + "wandb": { + "project": "my-wandb-project", + "name": "ft-run-display-name" + "tags": [ + "first-experiment", "v2" + ] + } + } + ] + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.create({ + model: 'gpt-4o-mini', + training_file: 'file-abc123', + }); + + console.log(fineTuningJob.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.create( + model="gpt-4o-mini", + training_file="file-abc123", + ) + print(fine_tuning_job.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.New(context.TODO(), openai.FineTuningJobNewParams{ + Model: openai.FineTuningJobNewParamsModelBabbage002, + TrainingFile: "file-abc123", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobCreateParams params = JobCreateParams.builder() + .model(JobCreateParams.Model.BABBAGE_002) + .trainingFile("file-abc123") + .build(); + FineTuningJob fineTuningJob = client.fineTuning().jobs().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + fine_tuning_job = openai.fine_tuning.jobs.create(model: :"babbage-002", training_file: + "file-abc123") + + + puts(fine_tuning_job) + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": "file-abc123", + "training_file": "file-abc123", + "integrations": [ + { + "type": "wandb", + "wandb": { + "project": "my-wandb-project", + "entity": None, + "run_id": "ftjob-abc123" + } + } + ], + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + }, + "metadata": null + } + description: > + Creates a fine-tuning job which begins the process of creating a new model from a given dataset. + + + Response includes details of the enqueued job including job status and the name of the fine-tuned + models once complete. + + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization) + get: + operationId: listPaginatedFineTuningJobs + tags: + - Fine-tuning + summary: List fine-tuning jobs + parameters: + - name: after + in: query + description: Identifier for the last job from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of fine-tuning jobs to retrieve. + required: false + schema: + type: integer + default: 20 + - in: query + name: metadata + required: false + schema: + type: object + nullable: true + additionalProperties: + type: string + style: deepObject + explode: true + description: > + Optional metadata filter. To filter, use the syntax `metadata[k]=v`. Alternatively, set + `metadata=null` to indicate no metadata. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListPaginatedFineTuningJobsResponse' + x-oaiMeta: + name: List fine-tuning jobs + group: fine-tuning + returns: >- + A list of paginated [fine-tuning + job](https://platform.openai.com/docs/api-reference/fine-tuning/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": null, + "training_file": "file-abc123", + "metadata": { + "key": "value" + } + }, + { ... }, + { ... } + ], "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs?limit=2&metadata[key]=value \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.fine_tuning.jobs.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const fineTuningJob of client.fineTuning.jobs.list()) { + console.log(fineTuningJob.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.FineTuning.Jobs.List(context.TODO(), openai.FineTuningJobListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.JobListPage; + import com.openai.models.finetuning.jobs.JobListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobListPage page = client.fineTuning().jobs().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.fine_tuning.jobs.list + + puts(page) + description: | + List your organization's fine-tuning jobs + /fine_tuning/jobs/{fine_tuning_job_id}: + get: + operationId: retrieveFineTuningJob + tags: + - Fine-tuning + summary: Retrieve fine-tuning job + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTuningJob' + x-oaiMeta: + name: Retrieve fine-tuning job + group: fine-tuning + returns: >- + The [fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning/object) object with the + given ID. + examples: + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "davinci-002", + "created_at": 1692661014, + "finished_at": 1692661190, + "fine_tuned_model": "ft:davinci-002:my-org:custom_suffix:7q8mpxmy", + "organization_id": "org-123", + "result_files": [ + "file-abc123" + ], + "status": "succeeded", + "validation_file": null, + "training_file": "file-abc123", + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + }, + "trained_tokens": 5768, + "integrations": [], + "seed": 0, + "estimated_finish": 0, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + } + } + } + } + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs/ft-AF1WoRqd3aJAHsqc9NY7iL8F \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.retrieve( + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.retrieve('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.Get(context.TODO(), "ft-AF1WoRqd3aJAHsqc9NY7iL8F") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FineTuningJob fineTuningJob = client.fineTuning().jobs().retrieve("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + fine_tuning_job = openai.fine_tuning.jobs.retrieve("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(fine_tuning_job) + description: | + Get info about a fine-tuning job. + + [Learn more about fine-tuning](https://platform.openai.com/docs/guides/model-optimization) + /fine_tuning/jobs/{fine_tuning_job_id}/cancel: + post: + operationId: cancelFineTuningJob + tags: + - Fine-tuning + summary: Cancel fine-tuning + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job to cancel. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTuningJob' + x-oaiMeta: + name: Cancel fine-tuning + group: fine-tuning + returns: >- + The cancelled [fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning/object) + object. + examples: + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "cancelled", + "validation_file": "file-abc123", + "training_file": "file-abc123" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/fine_tuning/jobs/ftjob-abc123/cancel \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.cancel( + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.cancel('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.Cancel(context.TODO(), "ft-AF1WoRqd3aJAHsqc9NY7iL8F") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobCancelParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FineTuningJob fineTuningJob = client.fineTuning().jobs().cancel("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + fine_tuning_job = openai.fine_tuning.jobs.cancel("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(fine_tuning_job) + description: | + Immediately cancel a fine-tune job. + /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints: + get: + operationId: listFineTuningJobCheckpoints + tags: + - Fine-tuning + summary: List fine-tuning checkpoints + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job to get checkpoints for. + - name: after + in: query + description: Identifier for the last checkpoint ID from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of checkpoints to retrieve. + required: false + schema: + type: integer + default: 10 + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTuningJobCheckpointsResponse' + x-oaiMeta: + name: List fine-tuning checkpoints + group: fine-tuning + returns: >- + A list of fine-tuning [checkpoint + objects](https://platform.openai.com/docs/api-reference/fine-tuning/checkpoint-object) for a + fine-tuning job. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "fine_tuning.job.checkpoint", + "id": "ftckpt_zc4Q7MP6XxulcVzj4MZdwsAB", + "created_at": 1721764867, + "fine_tuned_model_checkpoint": "ft:gpt-4o-mini-2024-07-18:my-org:custom-suffix:96olL566:ckpt-step-2000", + "metrics": { + "full_valid_loss": 0.134, + "full_valid_mean_token_accuracy": 0.874 + }, + "fine_tuning_job_id": "ftjob-abc123", + "step_number": 2000 + }, + { + "object": "fine_tuning.job.checkpoint", + "id": "ftckpt_enQCFmOTGj3syEpYVhBRLTSy", + "created_at": 1721764800, + "fine_tuned_model_checkpoint": "ft:gpt-4o-mini-2024-07-18:my-org:custom-suffix:7q8mpxmy:ckpt-step-1000", + "metrics": { + "full_valid_loss": 0.167, + "full_valid_mean_token_accuracy": 0.781 + }, + "fine_tuning_job_id": "ftjob-abc123", + "step_number": 1000 + } + ], + "first_id": "ftckpt_zc4Q7MP6XxulcVzj4MZdwsAB", + "last_id": "ftckpt_enQCFmOTGj3syEpYVhBRLTSy", + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs/ftjob-abc123/checkpoints \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const fineTuningJobCheckpoint of client.fineTuning.jobs.checkpoints.list( + 'ft-AF1WoRqd3aJAHsqc9NY7iL8F', + )) { + console.log(fineTuningJobCheckpoint.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.fine_tuning.jobs.checkpoints.list( + fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.FineTuning.Jobs.Checkpoints.List( + context.TODO(), + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + openai.FineTuningJobCheckpointListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.checkpoints.CheckpointListPage; + import com.openai.models.finetuning.jobs.checkpoints.CheckpointListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + CheckpointListPage page = client.fineTuning().jobs().checkpoints().list("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.fine_tuning.jobs.checkpoints.list("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(page) + description: | + List checkpoints for a fine-tuning job. + /fine_tuning/jobs/{fine_tuning_job_id}/events: + get: + operationId: listFineTuningEvents + tags: + - Fine-tuning + summary: List fine-tuning events + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job to get events for. + - name: after + in: query + description: Identifier for the last event from the previous pagination request. + required: false + schema: + type: string + - name: limit + in: query + description: Number of events to retrieve. + required: false + schema: + type: integer + default: 20 + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFineTuningJobEventsResponse' + x-oaiMeta: + name: List fine-tuning events + group: fine-tuning + returns: A list of fine-tuning event objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "fine_tuning.job.event", + "id": "ft-event-ddTJfwuMVpfLXseO0Am0Gqjm", + "created_at": 1721764800, + "level": "info", + "message": "Fine tuning job successfully completed", + "data": null, + "type": "message" + }, + { + "object": "fine_tuning.job.event", + "id": "ft-event-tyiGuB72evQncpH87xe505Sv", + "created_at": 1721764800, + "level": "info", + "message": "New fine-tuned model created: ft:gpt-4o-mini:openai::7p4lURel", + "data": null, + "type": "message" + } + ], + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs/ftjob-abc123/events \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.fine_tuning.jobs.list_events( + fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + page = page.data[0] + print(page.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + // Automatically fetches more pages as needed. + + for await (const fineTuningJobEvent of + client.fineTuning.jobs.listEvents('ft-AF1WoRqd3aJAHsqc9NY7iL8F')) { + console.log(fineTuningJobEvent.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.FineTuning.Jobs.ListEvents( + context.TODO(), + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + openai.FineTuningJobListEventsParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.JobListEventsPage; + import com.openai.models.finetuning.jobs.JobListEventsParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + JobListEventsPage page = client.fineTuning().jobs().listEvents("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.fine_tuning.jobs.list_events("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(page) + description: | + Get status updates for a fine-tuning job. + /fine_tuning/jobs/{fine_tuning_job_id}/pause: + post: + operationId: pauseFineTuningJob + tags: + - Fine-tuning + summary: Pause fine-tuning + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job to pause. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTuningJob' + x-oaiMeta: + name: Pause fine-tuning + group: fine-tuning + returns: The paused [fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning/object) object. + examples: + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "paused", + "validation_file": "file-abc123", + "training_file": "file-abc123" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/fine_tuning/jobs/ftjob-abc123/pause \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.pause( + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.pause('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.Pause(context.TODO(), "ft-AF1WoRqd3aJAHsqc9NY7iL8F") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobPauseParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FineTuningJob fineTuningJob = client.fineTuning().jobs().pause("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + fine_tuning_job = openai.fine_tuning.jobs.pause("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(fine_tuning_job) + description: | + Pause a fine-tune job. + /fine_tuning/jobs/{fine_tuning_job_id}/resume: + post: + operationId: resumeFineTuningJob + tags: + - Fine-tuning + summary: Resume fine-tuning + parameters: + - in: path + name: fine_tuning_job_id + required: true + schema: + type: string + example: ft-AF1WoRqd3aJAHsqc9NY7iL8F + description: | + The ID of the fine-tuning job to resume. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FineTuningJob' + x-oaiMeta: + name: Resume fine-tuning + group: fine-tuning + returns: The resumed [fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning/object) object. + examples: + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": "file-abc123", + "training_file": "file-abc123" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/fine_tuning/jobs/ftjob-abc123/resume \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + fine_tuning_job = client.fine_tuning.jobs.resume( + "ft-AF1WoRqd3aJAHsqc9NY7iL8F", + ) + print(fine_tuning_job.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const fineTuningJob = await client.fineTuning.jobs.resume('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + + console.log(fineTuningJob.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + fineTuningJob, err := client.FineTuning.Jobs.Resume(context.TODO(), "ft-AF1WoRqd3aJAHsqc9NY7iL8F") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", fineTuningJob.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.finetuning.jobs.FineTuningJob; + import com.openai.models.finetuning.jobs.JobResumeParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FineTuningJob fineTuningJob = client.fineTuning().jobs().resume("ft-AF1WoRqd3aJAHsqc9NY7iL8F"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + fine_tuning_job = openai.fine_tuning.jobs.resume("ft-AF1WoRqd3aJAHsqc9NY7iL8F") + + puts(fine_tuning_job) + description: | + Resume a fine-tune job. + /images/edits: + post: + operationId: createImageEdit + tags: + - Images + summary: Create image edit + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateImageEditRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + text/event-stream: + schema: + $ref: '#/components/schemas/ImageEditStreamEvent' + x-oaiMeta: + name: Create image edit + group: images + returns: Returns an [image](https://platform.openai.com/docs/api-reference/images/object) object. + examples: + - title: Edit image + request: + curl: | + curl -s -D >(grep -i x-request-id >&2) \ + -o >(jq -r '.data[0].b64_json' | base64 --decode > gift-basket.png) \ + -X POST "https://api.openai.com/v1/images/edits" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F "model=gpt-image-1" \ + -F "image[]=@body-lotion.png" \ + -F "image[]=@bath-bomb.png" \ + -F "image[]=@incense-kit.png" \ + -F "image[]=@soap.png" \ + -F 'prompt=Create a lovely gift basket with these four items in it' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + images_response = client.images.edit( + image=b"raw file contents", + prompt="A cute baby sea otter wearing a beret", + ) + print(images_response) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const imagesResponse = await client.images.edit({ + image: fs.createReadStream('path/to/file'), + prompt: 'A cute baby sea otter wearing a beret', + }); + + console.log(imagesResponse); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + imagesResponse, err := client.Images.Edit(context.TODO(), openai.ImageEditParams{ + Image: openai.ImageEditParamsImageUnion{ + OfFile: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + }, + Prompt: "A cute baby sea otter wearing a beret", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", imagesResponse) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.images.ImageEditParams; + import com.openai.models.images.ImagesResponse; + import java.io.ByteArrayInputStream; + import java.io.InputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ImageEditParams params = ImageEditParams.builder() + .image(ByteArrayInputStream("some content".getBytes())) + .prompt("A cute baby sea otter wearing a beret") + .build(); + ImagesResponse imagesResponse = client.images().edit(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + images_response = openai.images.edit(image: Pathname(__FILE__), prompt: "A cute baby sea otter + wearing a beret") + + + puts(images_response) + - title: Streaming + request: + curl: | + curl -s -N -X POST "https://api.openai.com/v1/images/edits" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F "model=gpt-image-1" \ + -F "image[]=@body-lotion.png" \ + -F "image[]=@bath-bomb.png" \ + -F "image[]=@incense-kit.png" \ + -F "image[]=@soap.png" \ + -F 'prompt=Create a lovely gift basket with these four items in it' \ + -F "stream=true" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + images_response = client.images.edit( + image=b"raw file contents", + prompt="A cute baby sea otter wearing a beret", + ) + print(images_response) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const imagesResponse = await client.images.edit({ + image: fs.createReadStream('path/to/file'), + prompt: 'A cute baby sea otter wearing a beret', + }); + + console.log(imagesResponse); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + imagesResponse, err := client.Images.Edit(context.TODO(), openai.ImageEditParams{ + Image: openai.ImageEditParamsImageUnion{ + OfFile: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + }, + Prompt: "A cute baby sea otter wearing a beret", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", imagesResponse) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.images.ImageEditParams; + import com.openai.models.images.ImagesResponse; + import java.io.ByteArrayInputStream; + import java.io.InputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ImageEditParams params = ImageEditParams.builder() + .image(ByteArrayInputStream("some content".getBytes())) + .prompt("A cute baby sea otter wearing a beret") + .build(); + ImagesResponse imagesResponse = client.images().edit(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + images_response = openai.images.edit(image: Pathname(__FILE__), prompt: "A cute baby sea otter + wearing a beret") + + + puts(images_response) + response: > + event: image_edit.partial_image + + data: {"type":"image_edit.partial_image","b64_json":"...","partial_image_index":0} + + + event: image_edit.completed + + data: + {"type":"image_edit.completed","b64_json":"...","usage":{"total_tokens":100,"input_tokens":50,"output_tokens":50,"input_tokens_details":{"text_tokens":10,"image_tokens":40}}} + description: >- + Creates an edited or extended image given one or more source images and a prompt. This endpoint only + supports `gpt-image-1` and `dall-e-2`. + /images/generations: + post: + operationId: createImage + tags: + - Images + summary: Create image + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateImageRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + text/event-stream: + schema: + $ref: '#/components/schemas/ImageGenStreamEvent' + x-oaiMeta: + name: Create image + group: images + returns: Returns an [image](https://platform.openai.com/docs/api-reference/images/object) object. + examples: + - title: Generate image + request: + curl: | + curl https://api.openai.com/v1/images/generations \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-image-1", + "prompt": "A cute baby sea otter", + "n": 1, + "size": "1024x1024" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + images_response = client.images.generate( + prompt="A cute baby sea otter", + ) + print(images_response) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const imagesResponse = await client.images.generate({ prompt: 'A cute baby sea otter' }); + + console.log(imagesResponse); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + imagesResponse, err := client.Images.Generate(context.TODO(), openai.ImageGenerateParams{ + Prompt: "A cute baby sea otter", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", imagesResponse) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.images.ImageGenerateParams; + import com.openai.models.images.ImagesResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ImageGenerateParams params = ImageGenerateParams.builder() + .prompt("A cute baby sea otter") + .build(); + ImagesResponse imagesResponse = client.images().generate(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + images_response = openai.images.generate(prompt: "A cute baby sea otter") + + puts(images_response) + response: | + { + "created": 1713833628, + "data": [ + { + "b64_json": "..." + } + ], + "usage": { + "total_tokens": 100, + "input_tokens": 50, + "output_tokens": 50, + "input_tokens_details": { + "text_tokens": 10, + "image_tokens": 40 + } + } + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/images/generations \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-image-1", + "prompt": "A cute baby sea otter", + "n": 1, + "size": "1024x1024", + "stream": true + }' \ + --no-buffer + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + images_response = client.images.generate( + prompt="A cute baby sea otter", + ) + print(images_response) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const imagesResponse = await client.images.generate({ prompt: 'A cute baby sea otter' }); + + console.log(imagesResponse); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + imagesResponse, err := client.Images.Generate(context.TODO(), openai.ImageGenerateParams{ + Prompt: "A cute baby sea otter", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", imagesResponse) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.images.ImageGenerateParams; + import com.openai.models.images.ImagesResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ImageGenerateParams params = ImageGenerateParams.builder() + .prompt("A cute baby sea otter") + .build(); + ImagesResponse imagesResponse = client.images().generate(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + images_response = openai.images.generate(prompt: "A cute baby sea otter") + + puts(images_response) + response: > + event: image_generation.partial_image + + data: {"type":"image_generation.partial_image","b64_json":"...","partial_image_index":0} + + + event: image_generation.completed + + data: + {"type":"image_generation.completed","b64_json":"...","usage":{"total_tokens":100,"input_tokens":50,"output_tokens":50,"input_tokens_details":{"text_tokens":10,"image_tokens":40}}} + description: | + Creates an image given a prompt. [Learn more](https://platform.openai.com/docs/guides/images). + /images/variations: + post: + operationId: createImageVariation + tags: + - Images + summary: Create image variation + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateImageVariationRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ImagesResponse' + x-oaiMeta: + name: Create image variation + group: images + returns: Returns a list of [image](https://platform.openai.com/docs/api-reference/images/object) objects. + examples: + response: | + { + "created": 1589478378, + "data": [ + { + "url": "https://..." + }, + { + "url": "https://..." + } + ] + } + request: + curl: | + curl https://api.openai.com/v1/images/variations \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F image="@otter.png" \ + -F n=2 \ + -F size="1024x1024" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + images_response = client.images.create_variation( + image=b"raw file contents", + ) + print(images_response.created) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const imagesResponse = await client.images.createVariation({ image: + fs.createReadStream('otter.png') }); + + + console.log(imagesResponse.created); + csharp: | + using System; + + using OpenAI.Images; + + ImageClient client = new( + model: "dall-e-2", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + GeneratedImage image = client.GenerateImageVariation(imageFilePath: "otter.png"); + + Console.WriteLine(image.ImageUri); + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + imagesResponse, err := client.Images.NewVariation(context.TODO(), openai.ImageNewVariationParams{ + Image: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", imagesResponse.Created) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.images.ImageCreateVariationParams; + import com.openai.models.images.ImagesResponse; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ImageCreateVariationParams params = ImageCreateVariationParams.builder() + .image(ByteArrayInputStream("some content".getBytes())) + .build(); + ImagesResponse imagesResponse = client.images().createVariation(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + images_response = openai.images.create_variation(image: Pathname(__FILE__)) + + puts(images_response) + description: Creates a variation of a given image. This endpoint only supports `dall-e-2`. + /models: + get: + operationId: listModels + tags: + - Models + summary: List models + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListModelsResponse' + x-oaiMeta: + name: List models + group: models + returns: A list of [model](https://platform.openai.com/docs/api-reference/models/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "model-id-0", + "object": "model", + "created": 1686935002, + "owned_by": "organization-owner" + }, + { + "id": "model-id-1", + "object": "model", + "created": 1686935002, + "owned_by": "organization-owner", + }, + { + "id": "model-id-2", + "object": "model", + "created": 1686935002, + "owned_by": "openai" + }, + ], + "object": "list" + } + request: + curl: | + curl https://api.openai.com/v1/models \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.models.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const model of client.models.list()) { + console.log(model.id); + } + csharp: | + using System; + + using OpenAI.Models; + + OpenAIModelClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + foreach (var model in client.GetModels().Value) + { + Console.WriteLine(model.Id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Models.List(context.TODO()) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.models.ModelListPage; + import com.openai.models.models.ModelListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ModelListPage page = client.models().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.models.list + + puts(page) + description: >- + Lists the currently available models, and provides basic information about each one such as the owner + and availability. + /models/{model}: + get: + operationId: retrieveModel + tags: + - Models + summary: Retrieve model + parameters: + - in: path + name: model + required: true + schema: + type: string + example: gpt-4o-mini + description: The ID of the model to use for this request + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Model' + x-oaiMeta: + name: Retrieve model + group: models + returns: >- + The [model](https://platform.openai.com/docs/api-reference/models/object) object matching the + specified ID. + examples: + response: | + { + "id": "VAR_chat_model_id", + "object": "model", + "created": 1686935002, + "owned_by": "openai" + } + request: + curl: | + curl https://api.openai.com/v1/models/VAR_chat_model_id \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + model = client.models.retrieve( + "gpt-4o-mini", + ) + print(model.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const model = await client.models.retrieve('gpt-4o-mini'); + + console.log(model.id); + csharp: | + using System; + using System.ClientModel; + + using OpenAI.Models; + + OpenAIModelClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ClientResult model = client.GetModel("babbage-002"); + Console.WriteLine(model.Value.Id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + model, err := client.Models.Get(context.TODO(), "gpt-4o-mini") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", model.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.models.Model; + import com.openai.models.models.ModelRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Model model = client.models().retrieve("gpt-4o-mini"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + model = openai.models.retrieve("gpt-4o-mini") + + puts(model) + description: >- + Retrieves a model instance, providing basic information about the model such as the owner and + permissioning. + delete: + operationId: deleteModel + tags: + - Models + summary: Delete a fine-tuned model + parameters: + - in: path + name: model + required: true + schema: + type: string + example: ft:gpt-4o-mini:acemeco:suffix:abc123 + description: The model to delete + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteModelResponse' + x-oaiMeta: + name: Delete a fine-tuned model + group: models + returns: Deletion status. + examples: + response: | + { + "id": "ft:gpt-4o-mini:acemeco:suffix:abc123", + "object": "model", + "deleted": true + } + request: + curl: | + curl https://api.openai.com/v1/models/ft:gpt-4o-mini:acemeco:suffix:abc123 \ + -X DELETE \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + model_deleted = client.models.delete( + "ft:gpt-4o-mini:acemeco:suffix:abc123", + ) + print(model_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const modelDeleted = await client.models.delete('ft:gpt-4o-mini:acemeco:suffix:abc123'); + + console.log(modelDeleted.id); + csharp: | + using System; + using System.ClientModel; + + using OpenAI.Models; + + OpenAIModelClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ClientResult success = client.DeleteModel("ft:gpt-4o-mini:acemeco:suffix:abc123"); + Console.WriteLine(success); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + modelDeleted, err := client.Models.Delete(context.TODO(), "ft:gpt-4o-mini:acemeco:suffix:abc123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", modelDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.models.ModelDeleteParams; + import com.openai.models.models.ModelDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ModelDeleted modelDeleted = client.models().delete("ft:gpt-4o-mini:acemeco:suffix:abc123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + model_deleted = openai.models.delete("ft:gpt-4o-mini:acemeco:suffix:abc123") + + puts(model_deleted) + description: Delete a fine-tuned model. You must have the Owner role in your organization to delete a model. + /moderations: + post: + operationId: createModeration + tags: + - Moderations + summary: Create moderation + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateModerationRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/CreateModerationResponse' + x-oaiMeta: + name: Create moderation + group: moderations + returns: A [moderation](https://platform.openai.com/docs/api-reference/moderations/object) object. + examples: + - title: Single string + request: + curl: | + curl https://api.openai.com/v1/moderations \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "input": "I want to kill them." + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + moderation = client.moderations.create( + input="I want to kill them.", + ) + print(moderation.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const moderation = await client.moderations.create({ input: 'I want to kill them.' }); + + console.log(moderation.id); + csharp: | + using System; + using System.ClientModel; + + using OpenAI.Moderations; + + ModerationClient client = new( + model: "omni-moderation-latest", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ClientResult moderation = client.ClassifyText("I want to kill them."); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + moderation, err := client.Moderations.New(context.TODO(), openai.ModerationNewParams{ + Input: openai.ModerationNewParamsInputUnion{ + OfString: openai.String("I want to kill them."), + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", moderation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.moderations.ModerationCreateParams; + import com.openai.models.moderations.ModerationCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ModerationCreateParams params = ModerationCreateParams.builder() + .input("I want to kill them.") + .build(); + ModerationCreateResponse moderation = client.moderations().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + moderation = openai.moderations.create(input: "I want to kill them.") + + puts(moderation) + response: | + { + "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR", + "model": "text-moderation-007", + "results": [ + { + "flagged": true, + "categories": { + "sexual": false, + "hate": false, + "harassment": true, + "self-harm": false, + "sexual/minors": false, + "hate/threatening": false, + "violence/graphic": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "harassment/threatening": true, + "violence": true + }, + "category_scores": { + "sexual": 0.000011726012417057063, + "hate": 0.22706663608551025, + "harassment": 0.5215635299682617, + "self-harm": 2.227119921371923e-6, + "sexual/minors": 7.107352217872176e-8, + "hate/threatening": 0.023547329008579254, + "violence/graphic": 0.00003391829886822961, + "self-harm/intent": 1.646940972932498e-6, + "self-harm/instructions": 1.1198755256458526e-9, + "harassment/threatening": 0.5694745779037476, + "violence": 0.9971134662628174 + } + } + ] + } + - title: Image and text + request: + curl: | + curl https://api.openai.com/v1/moderations \ + -X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "omni-moderation-latest", + "input": [ + { "type": "text", "text": "...text to classify goes here..." }, + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.png" + } + } + ] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + moderation = client.moderations.create( + input="I want to kill them.", + ) + print(moderation.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const moderation = await client.moderations.create({ input: 'I want to kill them.' }); + + console.log(moderation.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + moderation, err := client.Moderations.New(context.TODO(), openai.ModerationNewParams{ + Input: openai.ModerationNewParamsInputUnion{ + OfString: openai.String("I want to kill them."), + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", moderation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.moderations.ModerationCreateParams; + import com.openai.models.moderations.ModerationCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ModerationCreateParams params = ModerationCreateParams.builder() + .input("I want to kill them.") + .build(); + ModerationCreateResponse moderation = client.moderations().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + moderation = openai.moderations.create(input: "I want to kill them.") + + puts(moderation) + response: | + { + "id": "modr-0d9740456c391e43c445bf0f010940c7", + "model": "omni-moderation-latest", + "results": [ + { + "flagged": true, + "categories": { + "harassment": true, + "harassment/threatening": true, + "sexual": false, + "hate": false, + "hate/threatening": false, + "illicit": false, + "illicit/violent": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "self-harm": false, + "sexual/minors": false, + "violence": true, + "violence/graphic": true + }, + "category_scores": { + "harassment": 0.8189693396524255, + "harassment/threatening": 0.804985420696006, + "sexual": 1.573112165348997e-6, + "hate": 0.007562942636942845, + "hate/threatening": 0.004208854591835476, + "illicit": 0.030535955153511665, + "illicit/violent": 0.008925306722380033, + "self-harm/intent": 0.00023023930975076432, + "self-harm/instructions": 0.0002293869201073356, + "self-harm": 0.012598046106750154, + "sexual/minors": 2.212566909570261e-8, + "violence": 0.9999992735124786, + "violence/graphic": 0.843064871157054 + }, + "category_applied_input_types": { + "harassment": [ + "text" + ], + "harassment/threatening": [ + "text" + ], + "sexual": [ + "text", + "image" + ], + "hate": [ + "text" + ], + "hate/threatening": [ + "text" + ], + "illicit": [ + "text" + ], + "illicit/violent": [ + "text" + ], + "self-harm/intent": [ + "text", + "image" + ], + "self-harm/instructions": [ + "text", + "image" + ], + "self-harm": [ + "text", + "image" + ], + "sexual/minors": [ + "text" + ], + "violence": [ + "text", + "image" + ], + "violence/graphic": [ + "text", + "image" + ] + } + } + ] + } + description: | + Classifies if text and/or image inputs are potentially harmful. Learn + more in the [moderation guide](https://platform.openai.com/docs/guides/moderation). + /organization/admin_api_keys: + get: + summary: List all organization and project API keys. + operationId: admin-api-keys-list + description: List organization API keys + parameters: + - in: query + name: after + required: false + schema: + type: string + nullable: true + description: Return keys with IDs that come after this ID in the pagination order. + - in: query + name: order + required: false + schema: + type: string + enum: + - asc + - desc + default: asc + description: Order results by creation time, ascending or descending. + - in: query + name: limit + required: false + schema: + type: integer + default: 20 + description: Maximum number of keys to return. + responses: + '200': + description: A list of organization API keys. + content: + application/json: + schema: + $ref: '#/components/schemas/ApiKeyList' + x-oaiMeta: + name: List all organization and project API keys. + group: administration + returns: A list of admin and project API key objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.admin_api_key", + "id": "key_abc", + "name": "Main Admin Key", + "redacted_value": "sk-admin...def", + "created_at": 1711471533, + "last_used_at": 1711471534, + "owner": { + "type": "service_account", + "object": "organization.service_account", + "id": "sa_456", + "name": "My Service Account", + "created_at": 1711471533, + "role": "member" + } + } + ], + "first_id": "key_abc", + "last_id": "key_abc", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/organization/admin_api_keys?after=key_abc&limit=20 \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + post: + summary: Create admin API key + operationId: admin-api-keys-create + description: Create an organization admin API key + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - name + properties: + name: + type: string + example: New Admin Key + responses: + '200': + description: The newly created admin API key. + content: + application/json: + schema: + $ref: '#/components/schemas/AdminApiKey' + x-oaiMeta: + name: Create admin API key + group: administration + returns: >- + The created [AdminApiKey](https://platform.openai.com/docs/api-reference/admin-api-keys/object) + object. + examples: + response: | + { + "object": "organization.admin_api_key", + "id": "key_xyz", + "name": "New Admin Key", + "redacted_value": "sk-admin...xyz", + "created_at": 1711471533, + "last_used_at": 1711471534, + "owner": { + "type": "user", + "object": "organization.user", + "id": "user_123", + "name": "John Doe", + "created_at": 1711471533, + "role": "owner" + }, + "value": "sk-admin-1234abcd" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/admin_api_keys \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "New Admin Key" + }' + /organization/admin_api_keys/{key_id}: + get: + summary: Retrieve admin API key + operationId: admin-api-keys-get + description: Retrieve a single organization API key + parameters: + - in: path + name: key_id + required: true + schema: + type: string + description: The ID of the API key. + responses: + '200': + description: Details of the requested API key. + content: + application/json: + schema: + $ref: '#/components/schemas/AdminApiKey' + x-oaiMeta: + name: Retrieve admin API key + group: administration + returns: >- + The requested [AdminApiKey](https://platform.openai.com/docs/api-reference/admin-api-keys/object) + object. + examples: + response: | + { + "object": "organization.admin_api_key", + "id": "key_abc", + "name": "Main Admin Key", + "redacted_value": "sk-admin...xyz", + "created_at": 1711471533, + "last_used_at": 1711471534, + "owner": { + "type": "user", + "object": "organization.user", + "id": "user_123", + "name": "John Doe", + "created_at": 1711471533, + "role": "owner" + } + } + request: + curl: | + curl https://api.openai.com/v1/organization/admin_api_keys/key_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + delete: + summary: Delete admin API key + operationId: admin-api-keys-delete + description: Delete an organization admin API key + parameters: + - in: path + name: key_id + required: true + schema: + type: string + description: The ID of the API key to be deleted. + responses: + '200': + description: Confirmation that the API key was deleted. + content: + application/json: + schema: + type: object + properties: + id: + type: string + example: key_abc + object: + type: string + example: organization.admin_api_key.deleted + deleted: + type: boolean + example: true + x-oaiMeta: + name: Delete admin API key + group: administration + returns: A confirmation object indicating the key was deleted. + examples: + response: | + { + "id": "key_abc", + "object": "organization.admin_api_key.deleted", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/admin_api_keys/key_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + /organization/audit_logs: + get: + summary: List audit logs + operationId: list-audit-logs + tags: + - Audit Logs + parameters: + - name: effective_at + in: query + description: Return only events whose `effective_at` (Unix seconds) is in this range. + required: false + schema: + type: object + properties: + gt: + type: integer + description: Return only events whose `effective_at` (Unix seconds) is greater than this value. + gte: + type: integer + description: >- + Return only events whose `effective_at` (Unix seconds) is greater than or equal to this + value. + lt: + type: integer + description: Return only events whose `effective_at` (Unix seconds) is less than this value. + lte: + type: integer + description: Return only events whose `effective_at` (Unix seconds) is less than or equal to this value. + - name: project_ids[] + in: query + description: Return only events for these projects. + required: false + schema: + type: array + items: + type: string + - name: event_types[] + in: query + description: >- + Return only events with a `type` in one of these values. For example, `project.created`. For all + options, see the documentation for the [audit log + object](https://platform.openai.com/docs/api-reference/audit-logs/object). + required: false + schema: + type: array + items: + $ref: '#/components/schemas/AuditLogEventType' + - name: actor_ids[] + in: query + description: >- + Return only events performed by these actors. Can be a user ID, a service account ID, or an api + key tracking ID. + required: false + schema: + type: array + items: + type: string + - name: actor_emails[] + in: query + description: Return only events performed by users with these emails. + required: false + schema: + type: array + items: + type: string + - name: resource_ids[] + in: query + description: Return only events performed on these targets. For example, a project ID updated. + required: false + schema: + type: array + items: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + responses: + '200': + description: Audit logs listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListAuditLogsResponse' + x-oaiMeta: + name: List audit logs + group: audit-logs + returns: >- + A list of paginated [Audit Log](https://platform.openai.com/docs/api-reference/audit-logs/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "audit_log-xxx_yyyymmdd", + "type": "project.archived", + "effective_at": 1722461446, + "actor": { + "type": "api_key", + "api_key": { + "type": "user", + "user": { + "id": "user-xxx", + "email": "user@example.com" + } + } + }, + "project.archived": { + "id": "proj_abc" + }, + }, + { + "id": "audit_log-yyy__20240101", + "type": "api_key.updated", + "effective_at": 1720804190, + "actor": { + "type": "session", + "session": { + "user": { + "id": "user-xxx", + "email": "user@example.com" + }, + "ip_address": "127.0.0.1", + "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "ja3": "a497151ce4338a12c4418c44d375173e", + "ja4": "q13d0313h3_55b375c5d22e_c7319ce65786", + "ip_address_details": { + "country": "US", + "city": "San Francisco", + "region": "California", + "region_code": "CA", + "asn": "1234", + "latitude": "37.77490", + "longitude": "-122.41940" + } + } + }, + "api_key.updated": { + "id": "key_xxxx", + "data": { + "scopes": ["resource_2.operation_2"] + } + }, + } + ], + "first_id": "audit_log-xxx__20240101", + "last_id": "audit_log_yyy__20240101", + "has_more": true + } + request: + curl: | + curl https://api.openai.com/v1/organization/audit_logs \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: List user actions and configuration changes within this organization. + /organization/certificates: + get: + summary: List organization certificates + operationId: listOrganizationCertificates + tags: + - Certificates + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + responses: + '200': + description: Certificates listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: List organization certificates + group: administration + returns: A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/certificates \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" + response: | + { + "object": "list", + "data": [ + { + "object": "organization.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + "first_id": "cert_abc", + "last_id": "cert_abc", + "has_more": false + } + description: List uploaded certificates for this organization. + post: + summary: Upload certificate + operationId: uploadCertificate + tags: + - Certificates + requestBody: + description: The certificate upload payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UploadCertificateRequest' + responses: + '200': + description: Certificate uploaded successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Certificate' + x-oaiMeta: + name: Upload certificate + group: administration + returns: A single [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) object. + examples: + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/certificates \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My Example Certificate", + "certificate": "-----BEGIN CERTIFICATE-----\\nMIIDeT...\\n-----END CERTIFICATE-----" + }' + response: | + { + "object": "certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + } + description: | + Upload a certificate to the organization. This does **not** automatically activate the certificate. + + Organizations can upload up to 50 certificates. + /organization/certificates/activate: + post: + summary: Activate certificates for organization + operationId: activateOrganizationCertificates + tags: + - Certificates + requestBody: + description: The certificate activation payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ToggleCertificatesRequest' + responses: + '200': + description: Certificates activated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: Activate certificates for organization + group: administration + returns: >- + A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects + that were activated. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/certificates/activate \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "data": ["cert_abc", "cert_def"] + }' + response: | + { + "object": "organization.certificate.activation", + "data": [ + { + "object": "organization.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + { + "object": "organization.certificate", + "id": "cert_def", + "name": "My Example Certificate 2", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + } + description: | + Activate certificates at the organization level. + + You can atomically and idempotently activate up to 10 certificates at a time. + /organization/certificates/deactivate: + post: + summary: Deactivate certificates for organization + operationId: deactivateOrganizationCertificates + tags: + - Certificates + requestBody: + description: The certificate deactivation payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ToggleCertificatesRequest' + responses: + '200': + description: Certificates deactivated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: Deactivate certificates for organization + group: administration + returns: >- + A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects + that were deactivated. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/certificates/deactivate \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "data": ["cert_abc", "cert_def"] + }' + response: | + { + "object": "organization.certificate.deactivation", + "data": [ + { + "object": "organization.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": false, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + { + "object": "organization.certificate", + "id": "cert_def", + "name": "My Example Certificate 2", + "active": false, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + } + description: | + Deactivate certificates at the organization level. + + You can atomically and idempotently deactivate up to 10 certificates at a time. + /organization/certificates/{certificate_id}: + get: + summary: Get certificate + operationId: getCertificate + tags: + - Certificates + parameters: + - name: certificate_id + in: path + description: Unique ID of the certificate to retrieve. + required: true + schema: + type: string + - name: include + in: query + description: >- + A list of additional fields to include in the response. Currently the only supported value is + `content` to fetch the PEM content of the certificate. + required: false + schema: + type: array + items: + type: string + enum: + - content + responses: + '200': + description: Certificate retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Certificate' + x-oaiMeta: + name: Get certificate + group: administration + returns: A single [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) object. + examples: + request: + curl: | + curl "https://api.openai.com/v1/organization/certificates/cert_abc?include[]=content" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" + response: | + { + "object": "certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "created_at": 1234567, + "certificate_details": { + "valid_at": 1234567, + "expires_at": 12345678, + "content": "-----BEGIN CERTIFICATE-----MIIDeT...-----END CERTIFICATE-----" + } + } + description: | + Get a certificate that has been uploaded to the organization. + + You can get a certificate regardless of whether it is active or not. + post: + summary: Modify certificate + operationId: modifyCertificate + tags: + - Certificates + requestBody: + description: The certificate modification payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ModifyCertificateRequest' + responses: + '200': + description: Certificate modified successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Certificate' + x-oaiMeta: + name: Modify certificate + group: administration + returns: >- + The updated [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) + object. + examples: + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/certificates/cert_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Renamed Certificate" + }' + response: | + { + "object": "certificate", + "id": "cert_abc", + "name": "Renamed Certificate", + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + } + description: | + Modify a certificate. Note that only the name can be modified. + delete: + summary: Delete certificate + operationId: deleteCertificate + tags: + - Certificates + responses: + '200': + description: Certificate deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteCertificateResponse' + x-oaiMeta: + name: Delete certificate + group: administration + returns: A confirmation object indicating the certificate was deleted. + examples: + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/certificates/cert_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" + response: | + { + "object": "certificate.deleted", + "id": "cert_abc" + } + description: | + Delete a certificate from the organization. + + The certificate must be inactive for the organization and all projects. + /organization/costs: + get: + summary: Costs + operationId: usage-costs + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: Width of each time bucket in response. Currently only `1d` is supported, default to `1d`. + required: false + schema: + type: string + enum: + - 1d + default: 1d + - name: project_ids + in: query + description: Return only costs for these projects. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the costs by the specified fields. Support fields include `project_id`, `line_item` and any + combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - line_item + - name: limit + in: query + description: > + A limit on the number of buckets to be returned. Limit can range between 1 and 180, and the + default is 7. + required: false + schema: + type: integer + default: 7 + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Costs data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Costs + group: usage-costs + returns: >- + A list of paginated, time bucketed + [Costs](https://platform.openai.com/docs/api-reference/usage/costs_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.costs.result", + "amount": { + "value": 0.06, + "currency": "usd" + }, + "line_item": null, + "project_id": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: | + curl "https://api.openai.com/v1/organization/costs?start_time=1730419200&limit=1" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Get costs details for the organization. + /organization/invites: + get: + summary: List invites + operationId: list-invites + tags: + - Invites + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + responses: + '200': + description: Invites listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/InviteListResponse' + x-oaiMeta: + name: List invites + group: administration + returns: A list of [Invite](https://platform.openai.com/docs/api-reference/invite/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.invite", + "id": "invite-abc", + "email": "user@example.com", + "role": "owner", + "status": "accepted", + "invited_at": 1711471533, + "expires_at": 1711471533, + "accepted_at": 1711471533 + } + ], + "first_id": "invite-abc", + "last_id": "invite-abc", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/organization/invites?after=invite-abc&limit=20 \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Returns a list of invites in the organization. + post: + summary: Create invite + operationId: inviteUser + tags: + - Invites + requestBody: + description: The invite request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/InviteRequest' + responses: + '200': + description: User invited successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Invite' + x-oaiMeta: + name: Create invite + group: administration + returns: The created [Invite](https://platform.openai.com/docs/api-reference/invite/object) object. + examples: + response: | + { + "object": "organization.invite", + "id": "invite-def", + "email": "anotheruser@example.com", + "role": "reader", + "status": "pending", + "invited_at": 1711471533, + "expires_at": 1711471533, + "accepted_at": null, + "projects": [ + { + "id": "project-xyz", + "role": "member" + }, + { + "id": "project-abc", + "role": "owner" + } + ] + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/invites \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "anotheruser@example.com", + "role": "reader", + "projects": [ + { + "id": "project-xyz", + "role": "member" + }, + { + "id": "project-abc", + "role": "owner" + } + ] + }' + description: >- + Create an invite for a user to the organization. The invite must be accepted by the user before they + have access to the organization. + /organization/invites/{invite_id}: + get: + summary: Retrieve invite + operationId: retrieve-invite + tags: + - Invites + parameters: + - in: path + name: invite_id + required: true + schema: + type: string + description: The ID of the invite to retrieve. + responses: + '200': + description: Invite retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Invite' + x-oaiMeta: + name: Retrieve invite + group: administration + returns: >- + The [Invite](https://platform.openai.com/docs/api-reference/invite/object) object matching the + specified ID. + examples: + response: | + { + "object": "organization.invite", + "id": "invite-abc", + "email": "user@example.com", + "role": "owner", + "status": "accepted", + "invited_at": 1711471533, + "expires_at": 1711471533, + "accepted_at": 1711471533 + } + request: + curl: | + curl https://api.openai.com/v1/organization/invites/invite-abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves an invite. + delete: + summary: Delete invite + operationId: delete-invite + tags: + - Invites + parameters: + - in: path + name: invite_id + required: true + schema: + type: string + description: The ID of the invite to delete. + responses: + '200': + description: Invite deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/InviteDeleteResponse' + x-oaiMeta: + name: Delete invite + group: administration + returns: Confirmation that the invite has been deleted + examples: + response: | + { + "object": "organization.invite.deleted", + "id": "invite-abc", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/invites/invite-abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Delete an invite. If the invite has already been accepted, it cannot be deleted. + /organization/projects: + get: + summary: List projects + operationId: list-projects + tags: + - Projects + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + - name: include_archived + in: query + schema: + type: boolean + default: false + description: >- + If `true` returns all projects including those that have been `archived`. Archived projects are + not included by default. + responses: + '200': + description: Projects listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectListResponse' + x-oaiMeta: + name: List projects + group: administration + returns: A list of [Project](https://platform.openai.com/docs/api-reference/projects/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "proj_abc", + "object": "organization.project", + "name": "Project example", + "created_at": 1711471533, + "archived_at": null, + "status": "active" + } + ], + "first_id": "proj-abc", + "last_id": "proj-xyz", + "has_more": false + } + request: + curl: > + curl + https://api.openai.com/v1/organization/projects?after=proj_abc&limit=20&include_archived=false \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Returns a list of projects. + post: + summary: Create project + operationId: create-project + tags: + - Projects + requestBody: + description: The project create request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectCreateRequest' + responses: + '200': + description: Project created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Project' + x-oaiMeta: + name: Create project + group: administration + returns: The created [Project](https://platform.openai.com/docs/api-reference/projects/object) object. + examples: + response: | + { + "id": "proj_abc", + "object": "organization.project", + "name": "Project ABC", + "created_at": 1711471533, + "archived_at": null, + "status": "active" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Project ABC" + }' + description: Create a new project in the organization. Projects can be created and archived, but cannot be deleted. + /organization/projects/{project_id}: + get: + summary: Retrieve project + operationId: retrieve-project + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + responses: + '200': + description: Project retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Project' + x-oaiMeta: + name: Retrieve project + group: administration + description: Retrieve a project. + returns: >- + The [Project](https://platform.openai.com/docs/api-reference/projects/object) object matching the + specified ID. + examples: + response: | + { + "id": "proj_abc", + "object": "organization.project", + "name": "Project example", + "created_at": 1711471533, + "archived_at": null, + "status": "active" + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves a project. + post: + summary: Modify project + operationId: modify-project + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + requestBody: + description: The project update request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUpdateRequest' + responses: + '200': + description: Project updated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Project' + '400': + description: Error response when updating the default project. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Modify project + group: administration + returns: The updated [Project](https://platform.openai.com/docs/api-reference/projects/object) object. + examples: + response: '' + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Project DEF" + }' + description: Modifies a project in the organization. + /organization/projects/{project_id}/api_keys: + get: + summary: List project API keys + operationId: list-project-api-keys + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + responses: + '200': + description: Project API keys listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectApiKeyListResponse' + x-oaiMeta: + name: List project API keys + group: administration + returns: >- + A list of [ProjectApiKey](https://platform.openai.com/docs/api-reference/project-api-keys/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.project.api_key", + "redacted_value": "sk-abc...def", + "name": "My API Key", + "created_at": 1711471533, + "last_used_at": 1711471534, + "id": "key_abc", + "owner": { + "type": "user", + "user": { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + } + } + ], + "first_id": "key_abc", + "last_id": "key_xyz", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/api_keys?after=key_abc&limit=20 \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Returns a list of API keys in the project. + /organization/projects/{project_id}/api_keys/{key_id}: + get: + summary: Retrieve project API key + operationId: retrieve-project-api-key + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: key_id + in: path + description: The ID of the API key. + required: true + schema: + type: string + responses: + '200': + description: Project API key retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectApiKey' + x-oaiMeta: + name: Retrieve project API key + group: administration + returns: >- + The [ProjectApiKey](https://platform.openai.com/docs/api-reference/project-api-keys/object) object + matching the specified ID. + examples: + response: | + { + "object": "organization.project.api_key", + "redacted_value": "sk-abc...def", + "name": "My API Key", + "created_at": 1711471533, + "last_used_at": 1711471534, + "id": "key_abc", + "owner": { + "type": "user", + "user": { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + } + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/api_keys/key_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves an API key in the project. + delete: + summary: Delete project API key + operationId: delete-project-api-key + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: key_id + in: path + description: The ID of the API key. + required: true + schema: + type: string + responses: + '200': + description: Project API key deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectApiKeyDeleteResponse' + '400': + description: Error response for various conditions. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Delete project API key + group: administration + returns: Confirmation of the key's deletion or an error if the key belonged to a service account + examples: + response: | + { + "object": "organization.project.api_key.deleted", + "id": "key_abc", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/projects/proj_abc/api_keys/key_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Deletes an API key from the project. + /organization/projects/{project_id}/archive: + post: + summary: Archive project + operationId: archive-project + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + responses: + '200': + description: Project archived successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/Project' + x-oaiMeta: + name: Archive project + group: administration + returns: The archived [Project](https://platform.openai.com/docs/api-reference/projects/object) object. + examples: + response: | + { + "id": "proj_abc", + "object": "organization.project", + "name": "Project DEF", + "created_at": 1711471533, + "archived_at": 1711471533, + "status": "archived" + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc/archive \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Archives a project in the organization. Archived projects cannot be used or updated. + /organization/projects/{project_id}/certificates: + get: + summary: List project certificates + operationId: listProjectCertificates + tags: + - Certificates + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + responses: + '200': + description: Certificates listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: List project certificates + group: administration + returns: A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/certificates \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" + response: | + { + "object": "list", + "data": [ + { + "object": "organization.project.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + "first_id": "cert_abc", + "last_id": "cert_abc", + "has_more": false + } + description: List certificates for this project. + /organization/projects/{project_id}/certificates/activate: + post: + summary: Activate certificates for project + operationId: activateProjectCertificates + tags: + - Certificates + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + requestBody: + description: The certificate activation payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ToggleCertificatesRequest' + responses: + '200': + description: Certificates activated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: Activate certificates for project + group: administration + returns: >- + A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects + that were activated. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/certificates/activate \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "data": ["cert_abc", "cert_def"] + }' + response: | + { + "object": "organization.project.certificate.activation", + "data": [ + { + "object": "organization.project.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + { + "object": "organization.project.certificate", + "id": "cert_def", + "name": "My Example Certificate 2", + "active": true, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + } + description: | + Activate certificates at the project level. + + You can atomically and idempotently activate up to 10 certificates at a time. + /organization/projects/{project_id}/certificates/deactivate: + post: + summary: Deactivate certificates for project + operationId: deactivateProjectCertificates + tags: + - Certificates + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + requestBody: + description: The certificate deactivation payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ToggleCertificatesRequest' + responses: + '200': + description: Certificates deactivated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ListCertificatesResponse' + x-oaiMeta: + name: Deactivate certificates for project + group: administration + returns: >- + A list of [Certificate](https://platform.openai.com/docs/api-reference/certificates/object) objects + that were deactivated. + examples: + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/certificates/deactivate \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "data": ["cert_abc", "cert_def"] + }' + response: | + { + "object": "organization.project.certificate.deactivation", + "data": [ + { + "object": "organization.project.certificate", + "id": "cert_abc", + "name": "My Example Certificate", + "active": false, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + { + "object": "organization.project.certificate", + "id": "cert_def", + "name": "My Example Certificate 2", + "active": false, + "created_at": 1234567, + "certificate_details": { + "valid_at": 12345667, + "expires_at": 12345678 + } + }, + ], + } + description: | + Deactivate certificates at the project level. You can atomically and + idempotently deactivate up to 10 certificates at a time. + /organization/projects/{project_id}/rate_limits: + get: + summary: List project rate limits + operationId: list-project-rate-limits + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: limit + in: query + description: | + A limit on the number of objects to be returned. The default is 100. + required: false + schema: + type: integer + default: 100 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, beginning with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + required: false + schema: + type: string + responses: + '200': + description: Project rate limits listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectRateLimitListResponse' + x-oaiMeta: + name: List project rate limits + group: administration + returns: >- + A list of + [ProjectRateLimit](https://platform.openai.com/docs/api-reference/project-rate-limits/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "project.rate_limit", + "id": "rl-ada", + "model": "ada", + "max_requests_per_1_minute": 600, + "max_tokens_per_1_minute": 150000, + "max_images_per_1_minute": 10 + } + ], + "first_id": "rl-ada", + "last_id": "rl-ada", + "has_more": false + } + request: + curl: > + curl https://api.openai.com/v1/organization/projects/proj_abc/rate_limits?after=rl_xxx&limit=20 + \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + error_response: | + { + "code": 404, + "message": "The project {project_id} was not found" + } + description: Returns the rate limits per model for a project. + /organization/projects/{project_id}/rate_limits/{rate_limit_id}: + post: + summary: Modify project rate limit + operationId: update-project-rate-limits + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: rate_limit_id + in: path + description: The ID of the rate limit. + required: true + schema: + type: string + requestBody: + description: The project rate limit update request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectRateLimitUpdateRequest' + responses: + '200': + description: Project rate limit updated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectRateLimit' + '400': + description: Error response for various conditions. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Modify project rate limit + group: administration + returns: >- + The updated + [ProjectRateLimit](https://platform.openai.com/docs/api-reference/project-rate-limits/object) + object. + examples: + response: | + { + "object": "project.rate_limit", + "id": "rl-ada", + "model": "ada", + "max_requests_per_1_minute": 600, + "max_tokens_per_1_minute": 150000, + "max_images_per_1_minute": 10 + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc/rate_limits/rl_xxx \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "max_requests_per_1_minute": 500 + }' + error_response: | + { + "code": 404, + "message": "The project {project_id} was not found" + } + description: Updates a project rate limit. + /organization/projects/{project_id}/service_accounts: + get: + summary: List project service accounts + operationId: list-project-service-accounts + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + responses: + '200': + description: Project service accounts listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectServiceAccountListResponse' + '400': + description: Error response when project is archived. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: List project service accounts + group: administration + returns: >- + A list of + [ProjectServiceAccount](https://platform.openai.com/docs/api-reference/project-service-accounts/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.project.service_account", + "id": "svc_acct_abc", + "name": "Service Account", + "role": "owner", + "created_at": 1711471533 + } + ], + "first_id": "svc_acct_abc", + "last_id": "svc_acct_xyz", + "has_more": false + } + request: + curl: > + curl + https://api.openai.com/v1/organization/projects/proj_abc/service_accounts?after=custom_id&limit=20 + \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Returns a list of service accounts in the project. + post: + summary: Create project service account + operationId: create-project-service-account + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + requestBody: + description: The project service account create request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectServiceAccountCreateRequest' + responses: + '200': + description: Project service account created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectServiceAccountCreateResponse' + '400': + description: Error response when project is archived. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Create project service account + group: administration + returns: >- + The created + [ProjectServiceAccount](https://platform.openai.com/docs/api-reference/project-service-accounts/object) + object. + examples: + response: | + { + "object": "organization.project.service_account", + "id": "svc_acct_abc", + "name": "Production App", + "role": "member", + "created_at": 1711471533, + "api_key": { + "object": "organization.project.service_account.api_key", + "value": "sk-abcdefghijklmnop123", + "name": "Secret Key", + "created_at": 1711471533, + "id": "key_abc" + } + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc/service_accounts \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Production App" + }' + description: >- + Creates a new service account in the project. This also returns an unredacted API key for the service + account. + /organization/projects/{project_id}/service_accounts/{service_account_id}: + get: + summary: Retrieve project service account + operationId: retrieve-project-service-account + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: service_account_id + in: path + description: The ID of the service account. + required: true + schema: + type: string + responses: + '200': + description: Project service account retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectServiceAccount' + x-oaiMeta: + name: Retrieve project service account + group: administration + returns: >- + The + [ProjectServiceAccount](https://platform.openai.com/docs/api-reference/project-service-accounts/object) + object matching the specified ID. + examples: + response: | + { + "object": "organization.project.service_account", + "id": "svc_acct_abc", + "name": "Service Account", + "role": "owner", + "created_at": 1711471533 + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/service_accounts/svc_acct_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves a service account in the project. + delete: + summary: Delete project service account + operationId: delete-project-service-account + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: service_account_id + in: path + description: The ID of the service account. + required: true + schema: + type: string + responses: + '200': + description: Project service account deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectServiceAccountDeleteResponse' + x-oaiMeta: + name: Delete project service account + group: administration + returns: >- + Confirmation of service account being deleted, or an error in case of an archived project, which has + no service accounts + examples: + response: | + { + "object": "organization.project.service_account.deleted", + "id": "svc_acct_abc", + "deleted": true + } + request: + curl: > + curl -X DELETE + https://api.openai.com/v1/organization/projects/proj_abc/service_accounts/svc_acct_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Deletes a service account from the project. + /organization/projects/{project_id}/users: + get: + summary: List project users + operationId: list-project-users + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + responses: + '200': + description: Project users listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUserListResponse' + '400': + description: Error response when project is archived. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: List project users + group: administration + returns: >- + A list of [ProjectUser](https://platform.openai.com/docs/api-reference/project-users/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + ], + "first_id": "user-abc", + "last_id": "user-xyz", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/users?after=user_abc&limit=20 \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Returns a list of users in the project. + post: + summary: Create project user + operationId: create-project-user + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + tags: + - Projects + requestBody: + description: The project user create request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUserCreateRequest' + responses: + '200': + description: User added to project successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUser' + '400': + description: Error response for various conditions. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Create project user + group: administration + returns: >- + The created [ProjectUser](https://platform.openai.com/docs/api-reference/project-users/object) + object. + examples: + response: | + { + "object": "organization.project.user", + "id": "user_abc", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc/users \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "user_id": "user_abc", + "role": "member" + }' + description: >- + Adds a user to the project. Users must already be members of the organization to be added to a + project. + /organization/projects/{project_id}/users/{user_id}: + get: + summary: Retrieve project user + operationId: retrieve-project-user + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + responses: + '200': + description: Project user retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUser' + x-oaiMeta: + name: Retrieve project user + group: administration + returns: >- + The [ProjectUser](https://platform.openai.com/docs/api-reference/project-users/object) object + matching the specified ID. + examples: + response: | + { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + request: + curl: | + curl https://api.openai.com/v1/organization/projects/proj_abc/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves a user in the project. + post: + summary: Modify project user + operationId: modify-project-user + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + requestBody: + description: The project user update request payload. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUserUpdateRequest' + responses: + '200': + description: Project user's role updated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUser' + '400': + description: Error response for various conditions. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Modify project user + group: administration + returns: >- + The updated [ProjectUser](https://platform.openai.com/docs/api-reference/project-users/object) + object. + examples: + response: | + { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/projects/proj_abc/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "role": "owner" + }' + description: Modifies a user's role in the project. + delete: + summary: Delete project user + operationId: delete-project-user + tags: + - Projects + parameters: + - name: project_id + in: path + description: The ID of the project. + required: true + schema: + type: string + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + responses: + '200': + description: Project user deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectUserDeleteResponse' + '400': + description: Error response for various conditions. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + x-oaiMeta: + name: Delete project user + group: administration + returns: >- + Confirmation that project has been deleted or an error in case of an archived project, which has no + users + examples: + response: | + { + "object": "organization.project.user.deleted", + "id": "user_abc", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/projects/proj_abc/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Deletes a user from the project. + /organization/usage/audio_speeches: + get: + summary: Audio speeches + operationId: usage-audio-speeches + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Audio speeches + group: usage-audio-speeches + returns: >- + A list of paginated, time bucketed [Audio speeches + usage](https://platform.openai.com/docs/api-reference/usage/audio_speeches_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.audio_speeches.result", + "characters": 45, + "num_model_requests": 1, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: > + curl "https://api.openai.com/v1/organization/usage/audio_speeches?start_time=1730419200&limit=1" + \ + + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + + -H "Content-Type: application/json" + description: Get audio speeches usage details for the organization. + /organization/usage/audio_transcriptions: + get: + summary: Audio transcriptions + operationId: usage-audio-transcriptions + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Audio transcriptions + group: usage-audio-transcriptions + returns: >- + A list of paginated, time bucketed [Audio transcriptions + usage](https://platform.openai.com/docs/api-reference/usage/audio_transcriptions_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.audio_transcriptions.result", + "seconds": 20, + "num_model_requests": 1, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: > + curl + "https://api.openai.com/v1/organization/usage/audio_transcriptions?start_time=1730419200&limit=1" + \ + + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + + -H "Content-Type: application/json" + description: Get audio transcriptions usage details for the organization. + /organization/usage/code_interpreter_sessions: + get: + summary: Code interpreter sessions + operationId: usage-code-interpreter-sessions + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: Group the usage data by the specified fields. Support fields include `project_id`. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Code interpreter sessions + group: usage-code-interpreter-sessions + returns: >- + A list of paginated, time bucketed [Code interpreter sessions + usage](https://platform.openai.com/docs/api-reference/usage/code_interpreter_sessions_object) + objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.code_interpreter_sessions.result", + "num_sessions": 1, + "project_id": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: > + curl + "https://api.openai.com/v1/organization/usage/code_interpreter_sessions?start_time=1730419200&limit=1" + \ + + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + + -H "Content-Type: application/json" + description: Get code interpreter sessions usage details for the organization. + /organization/usage/completions: + get: + summary: Completions + operationId: usage-completions + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: batch + in: query + description: > + If `true`, return batch jobs only. If `false`, return non-batch jobs only. By default, return + both. + required: false + schema: + type: boolean + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model`, `batch`, `service_tier` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - batch + - service_tier + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Completions + group: usage-completions + returns: >- + A list of paginated, time bucketed [Completions + usage](https://platform.openai.com/docs/api-reference/usage/completions_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.completions.result", + "input_tokens": 1000, + "output_tokens": 500, + "input_cached_tokens": 800, + "input_audio_tokens": 0, + "output_audio_tokens": 0, + "num_model_requests": 5, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null, + "batch": null, + "service_tier": null + } + ] + } + ], + "has_more": true, + "next_page": "page_AAAAAGdGxdEiJdKOAAAAAGcqsYA=" + } + request: + curl: | + curl "https://api.openai.com/v1/organization/usage/completions?start_time=1730419200&limit=1" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Get completions usage details for the organization. + /organization/usage/embeddings: + get: + summary: Embeddings + operationId: usage-embeddings + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Embeddings + group: usage-embeddings + returns: >- + A list of paginated, time bucketed [Embeddings + usage](https://platform.openai.com/docs/api-reference/usage/embeddings_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.embeddings.result", + "input_tokens": 16, + "num_model_requests": 2, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: | + curl "https://api.openai.com/v1/organization/usage/embeddings?start_time=1730419200&limit=1" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Get embeddings usage details for the organization. + /organization/usage/images: + get: + summary: Images + operationId: usage-images + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: sources + in: query + description: >- + Return only usages for these sources. Possible values are `image.generation`, `image.edit`, + `image.variation` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - image.generation + - image.edit + - image.variation + - name: sizes + in: query + description: >- + Return only usages for these image sizes. Possible values are `256x256`, `512x512`, `1024x1024`, + `1792x1792`, `1024x1792` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - 256x256 + - 512x512 + - 1024x1024 + - 1792x1792 + - 1024x1792 + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model`, `size`, `source` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - size + - source + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Images + group: usage-images + returns: >- + A list of paginated, time bucketed [Images + usage](https://platform.openai.com/docs/api-reference/usage/images_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.images.result", + "images": 2, + "num_model_requests": 2, + "size": null, + "source": null, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: | + curl "https://api.openai.com/v1/organization/usage/images?start_time=1730419200&limit=1" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Get images usage details for the organization. + /organization/usage/moderations: + get: + summary: Moderations + operationId: usage-moderations + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: user_ids + in: query + description: Return only usage for these users. + required: false + schema: + type: array + items: + type: string + - name: api_key_ids + in: query + description: Return only usage for these API keys. + required: false + schema: + type: array + items: + type: string + - name: models + in: query + description: Return only usage for these models. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: >- + Group the usage data by the specified fields. Support fields include `project_id`, `user_id`, + `api_key_id`, `model` or any combination of them. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - user_id + - api_key_id + - model + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Moderations + group: usage-moderations + returns: >- + A list of paginated, time bucketed [Moderations + usage](https://platform.openai.com/docs/api-reference/usage/moderations_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.moderations.result", + "input_tokens": 16, + "num_model_requests": 2, + "project_id": null, + "user_id": null, + "api_key_id": null, + "model": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: | + curl "https://api.openai.com/v1/organization/usage/moderations?start_time=1730419200&limit=1" \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Get moderations usage details for the organization. + /organization/usage/vector_stores: + get: + summary: Vector stores + operationId: usage-vector-stores + tags: + - Usage + parameters: + - name: start_time + in: query + description: Start time (Unix seconds) of the query time range, inclusive. + required: true + schema: + type: integer + - name: end_time + in: query + description: End time (Unix seconds) of the query time range, exclusive. + required: false + schema: + type: integer + - name: bucket_width + in: query + description: >- + Width of each time bucket in response. Currently `1m`, `1h` and `1d` are supported, default to + `1d`. + required: false + schema: + type: string + enum: + - 1m + - 1h + - 1d + default: 1d + - name: project_ids + in: query + description: Return only usage for these projects. + required: false + schema: + type: array + items: + type: string + - name: group_by + in: query + description: Group the usage data by the specified fields. Support fields include `project_id`. + required: false + schema: + type: array + items: + type: string + enum: + - project_id + - name: limit + in: query + description: | + Specifies the number of buckets to return. + - `bucket_width=1d`: default: 7, max: 31 + - `bucket_width=1h`: default: 24, max: 168 + - `bucket_width=1m`: default: 60, max: 1440 + required: false + schema: + type: integer + - name: page + in: query + description: A cursor for use in pagination. Corresponding to the `next_page` field from the previous response. + schema: + type: string + responses: + '200': + description: Usage data retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UsageResponse' + x-oaiMeta: + name: Vector stores + group: usage-vector-stores + returns: >- + A list of paginated, time bucketed [Vector stores + usage](https://platform.openai.com/docs/api-reference/usage/vector_stores_object) objects. + examples: + response: | + { + "object": "page", + "data": [ + { + "object": "bucket", + "start_time": 1730419200, + "end_time": 1730505600, + "results": [ + { + "object": "organization.usage.vector_stores.result", + "usage_bytes": 1024, + "project_id": null + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: > + curl "https://api.openai.com/v1/organization/usage/vector_stores?start_time=1730419200&limit=1" + \ + + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + + -H "Content-Type: application/json" + description: Get vector stores usage details for the organization. + /organization/users: + get: + summary: List users + operationId: list-users + tags: + - Users + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + required: false + schema: + type: string + - name: emails + in: query + description: Filter by the email address of users. + required: false + schema: + type: array + items: + type: string + responses: + '200': + description: Users listed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UserListResponse' + x-oaiMeta: + name: List users + group: administration + returns: A list of [User](https://platform.openai.com/docs/api-reference/users/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "object": "organization.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + ], + "first_id": "user-abc", + "last_id": "user-xyz", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/organization/users?after=user_abc&limit=20 \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Lists all of the users in the organization. + /organization/users/{user_id}: + get: + summary: Retrieve user + operationId: retrieve-user + tags: + - Users + parameters: + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + responses: + '200': + description: User retrieved successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/User' + x-oaiMeta: + name: Retrieve user + group: administration + returns: >- + The [User](https://platform.openai.com/docs/api-reference/users/object) object matching the + specified ID. + examples: + response: | + { + "object": "organization.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + request: + curl: | + curl https://api.openai.com/v1/organization/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Retrieves a user by their identifier. + post: + summary: Modify user + operationId: modify-user + tags: + - Users + parameters: + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + requestBody: + description: The new user role to modify. This must be one of `owner` or `member`. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UserRoleUpdateRequest' + responses: + '200': + description: User role updated successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/User' + x-oaiMeta: + name: Modify user + group: administration + returns: The updated [User](https://platform.openai.com/docs/api-reference/users/object) object. + examples: + response: | + { + "object": "organization.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + request: + curl: | + curl -X POST https://api.openai.com/v1/organization/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "role": "owner" + }' + description: Modifies a user's role in the organization. + delete: + summary: Delete user + operationId: delete-user + tags: + - Users + parameters: + - name: user_id + in: path + description: The ID of the user. + required: true + schema: + type: string + responses: + '200': + description: User deleted successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/UserDeleteResponse' + x-oaiMeta: + name: Delete user + group: administration + returns: Confirmation of the deleted user + examples: + response: | + { + "object": "organization.user.deleted", + "id": "user_abc", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/organization/users/user_abc \ + -H "Authorization: Bearer $OPENAI_ADMIN_KEY" \ + -H "Content-Type: application/json" + description: Deletes a user from the organization. + /realtime/calls: + post: + summary: Create call + operationId: create-realtime-call + tags: + - Realtime + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/RealtimeCallCreateRequest' + encoding: + sdp: + contentType: application/sdp + session: + contentType: application/json + application/sdp: + schema: + type: string + description: |- + WebRTC SDP offer. Use this variant when you have previously created an + ephemeral **session token** and are authenticating the request with it. + Realtime session parameters will be retrieved from the session token. + responses: + '201': + description: Realtime call created successfully. + headers: + Location: + description: Relative URL containing the call ID for subsequent control requests. + schema: + type: string + content: + application/sdp: + schema: + type: string + description: SDP answer produced by OpenAI for the peer connection. + x-oaiMeta: + name: Create call + group: realtime + returns: |- + Returns `201 Created` with the SDP answer in the response body. The + `Location` response header includes the call ID for follow-up requests, + e.g., establishing a monitoring WebSocket or hanging up the call. + examples: + response: >- + v=0 + + o=- 4227147428 1719357865 IN IP4 127.0.0.1 + + s=- + + c=IN IP4 0.0.0.0 + + t=0 0 + + a=group:BUNDLE 0 1 + + a=msid-semantic:WMS * + + a=fingerprint:sha-256 + CA:92:52:51:B4:91:3B:34:DD:9C:0B:FB:76:19:7E:3B:F1:21:0F:32:2C:38:01:72:5D:3F:78:C7:5F:8B:C7:36 + + m=audio 9 UDP/TLS/RTP/SAVPF 111 0 8 + + a=mid:0 + + a=ice-ufrag:kZ2qkHXX/u11 + + a=ice-pwd:uoD16Di5OGx3VbqgA3ymjEQV2kwiOjw6 + + a=setup:active + + a=rtcp-mux + + a=rtpmap:111 opus/48000/2 + + a=candidate:993865896 1 udp 2130706431 4.155.146.196 3478 typ host ufrag kZ2qkHXX/u11 + + a=candidate:1432411780 1 tcp 1671430143 4.155.146.196 443 typ host tcptype passive ufrag + kZ2qkHXX/u11 + + m=application 9 UDP/DTLS/SCTP webrtc-datachannel + + a=mid:1 + + a=sctp-port:5000 + request: + curl: |- + curl -X POST https://api.openai.com/v1/realtime/calls \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F "sdp=- + The identifier for the call provided in the + + [`realtime.call.incoming`](https://platform.openai.com/docs/api-reference/webhook-events/realtime/call/incoming) + + webhook. + requestBody: + required: true + description: Session configuration to apply before the caller is bridged to the model. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + responses: + '200': + description: Call accepted successfully. + x-oaiMeta: + name: Accept call + group: realtime-calls + returns: |- + Returns `200 OK` once OpenAI starts ringing the SIP leg with the supplied + session configuration. + examples: + response: '' + request: + curl: |- + curl -X POST https://api.openai.com/v1/realtime/calls/$CALL_ID/accept \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "type": "realtime", + "model": "gpt-realtime", + "instructions": "You are Alex, a friendly concierge for Example Corp.", + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.realtime.calls.accept('call_id', { type: 'realtime' }); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.realtime.calls.accept( + call_id="call_id", + type="realtime", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/realtime" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Realtime.Calls.Accept( + context.TODO(), + "call_id", + realtime.CallAcceptParams{ + RealtimeSessionCreateRequest: realtime.RealtimeSessionCreateRequestParam{ + + }, + }, + ) + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.realtime.RealtimeSessionCreateRequest; + import com.openai.models.realtime.calls.CallAcceptParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + CallAcceptParams params = CallAcceptParams.builder() + .callId("call_id") + .realtimeSessionCreateRequest(RealtimeSessionCreateRequest.builder().build()) + .build(); + client.realtime().calls().accept(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.realtime.calls.accept("call_id", type: :realtime) + + puts(result) + description: |- + Accept an incoming SIP call and configure the realtime session that will + handle it. + /realtime/calls/{call_id}/hangup: + post: + summary: Hang up call + operationId: hangup-realtime-call + tags: + - Realtime + parameters: + - in: path + name: call_id + required: true + schema: + type: string + description: >- + The identifier for the call. For SIP calls, use the value provided in the + + [`realtime.call.incoming`](https://platform.openai.com/docs/api-reference/webhook-events/realtime/call/incoming) + + webhook. For WebRTC sessions, reuse the call ID returned in the `Location` + + header when creating the call with + + [`POST /v1/realtime/calls`](https://platform.openai.com/docs/api-reference/realtime/create-call). + responses: + '200': + description: Call hangup initiated successfully. + x-oaiMeta: + name: Hang up call + group: realtime-calls + returns: Returns `200 OK` when OpenAI begins terminating the realtime call. + examples: + response: '' + request: + curl: |- + curl -X POST https://api.openai.com/v1/realtime/calls/$CALL_ID/hangup \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.realtime.calls.hangup('call_id'); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.realtime.calls.hangup( + "call_id", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Realtime.Calls.Hangup(context.TODO(), "call_id") + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.realtime.calls.CallHangupParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + client.realtime().calls().hangup("call_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.realtime.calls.hangup("call_id") + + puts(result) + description: |- + End an active Realtime API call, whether it was initiated over SIP or + WebRTC. + /realtime/calls/{call_id}/refer: + post: + summary: Refer call + operationId: refer-realtime-call + tags: + - Realtime + parameters: + - in: path + name: call_id + required: true + schema: + type: string + description: >- + The identifier for the call provided in the + + [`realtime.call.incoming`](https://platform.openai.com/docs/api-reference/webhook-events/realtime/call/incoming) + + webhook. + requestBody: + required: true + description: Destination URI for the REFER request. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeCallReferRequest' + responses: + '200': + description: Call referred successfully. + x-oaiMeta: + name: Refer call + group: realtime-calls + returns: Returns `200 OK` once the REFER is handed off to your SIP provider. + examples: + response: '' + request: + curl: |- + curl -X POST https://api.openai.com/v1/realtime/calls/$CALL_ID/refer \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"target_uri": "tel:+14155550123"}' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.realtime.calls.refer('call_id', { target_uri: 'tel:+14155550123' }); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.realtime.calls.refer( + call_id="call_id", + target_uri="tel:+14155550123", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/realtime" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Realtime.Calls.Refer( + context.TODO(), + "call_id", + realtime.CallReferParams{ + TargetUri: "tel:+14155550123", + }, + ) + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.realtime.calls.CallReferParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + CallReferParams params = CallReferParams.builder() + .callId("call_id") + .targetUri("tel:+14155550123") + .build(); + client.realtime().calls().refer(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.realtime.calls.refer("call_id", target_uri: "tel:+14155550123") + + puts(result) + description: Transfer an active SIP call to a new destination using the SIP REFER verb. + /realtime/calls/{call_id}/reject: + post: + summary: Reject call + operationId: reject-realtime-call + tags: + - Realtime + parameters: + - in: path + name: call_id + required: true + schema: + type: string + description: >- + The identifier for the call provided in the + + [`realtime.call.incoming`](https://platform.openai.com/docs/api-reference/webhook-events/realtime/call/incoming) + + webhook. + requestBody: + required: false + description: |- + Provide an optional SIP status code. When omitted the API responds with + `603 Decline`. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeCallRejectRequest' + responses: + '200': + description: Call rejected successfully. + x-oaiMeta: + name: Reject call + group: realtime-calls + returns: Returns `200 OK` after OpenAI sends the SIP status code to the caller. + examples: + response: '' + request: + curl: |- + curl -X POST https://api.openai.com/v1/realtime/calls/$CALL_ID/reject \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"status_code": 486}' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.realtime.calls.reject('call_id'); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.realtime.calls.reject( + call_id="call_id", + ) + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/realtime" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Realtime.Calls.Reject( + context.TODO(), + "call_id", + realtime.CallRejectParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.realtime.calls.CallRejectParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + client.realtime().calls().reject("call_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.realtime.calls.reject("call_id") + + puts(result) + description: Decline an incoming SIP call by returning a SIP status code to the caller. + /realtime/client_secrets: + post: + summary: Create client secret + operationId: create-realtime-client-secret + tags: + - Realtime + requestBody: + description: Create a client secret with the given session configuration. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeCreateClientSecretRequest' + responses: + '200': + description: Client secret created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeCreateClientSecretResponse' + x-oaiMeta: + name: Create client secret + group: realtime + returns: >- + The created client secret and the effective session object. The client secret is a string that looks + like `ek_1234`. + examples: + response: | + { + "value": "ek_68af296e8e408191a1120ab6383263c2", + "expires_at": 1756310470, + "session": { + "type": "realtime", + "object": "realtime.session", + "id": "sess_C9CiUVUzUzYIssh3ELY1d", + "model": "gpt-realtime", + "output_modalities": [ + "audio" + ], + "instructions": "You are a friendly assistant.", + "tools": [], + "tool_choice": "auto", + "max_output_tokens": "inf", + "tracing": null, + "truncation": "auto", + "prompt": null, + "expires_at": 0, + "audio": { + "input": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "transcription": null, + "noise_reduction": null, + "turn_detection": { + "type": "server_vad", + } + }, + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "alloy", + "speed": 1.0 + } + }, + "include": null + } + } + request: + curl: | + curl -X POST https://api.openai.com/v1/realtime/client_secrets \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "expires_after": { + "anchor": "created_at", + "seconds": 600 + }, + "session": { + "type": "realtime", + "model": "gpt-realtime", + "instructions": "You are a friendly assistant." + } + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const clientSecret = await client.realtime.clientSecrets.create(); + + console.log(clientSecret.expires_at); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client_secret = client.realtime.client_secrets.create() + print(client_secret.expires_at) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/realtime" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + clientSecret, err := client.Realtime.ClientSecrets.New(context.TODO(), realtime.ClientSecretNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", clientSecret.ExpiresAt) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.realtime.clientsecrets.ClientSecretCreateParams; + import com.openai.models.realtime.clientsecrets.ClientSecretCreateResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ClientSecretCreateResponse clientSecret = client.realtime().clientSecrets().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + client_secret = openai.realtime.client_secrets.create + + puts(client_secret) + description: | + Create a Realtime client secret with an associated session configuration. + /realtime/sessions: + post: + summary: Create session + operationId: create-realtime-session + tags: + - Realtime + requestBody: + description: Create an ephemeral API key with the given session configuration. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeSessionCreateRequest' + responses: + '200': + description: Session created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeSessionCreateResponse' + x-oaiMeta: + name: Create session + group: realtime + returns: The created Realtime session object, plus an ephemeral key + examples: + request: + curl: | + curl -X POST https://api.openai.com/v1/realtime/sessions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-realtime", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant." + }' + response: | + { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-realtime-2025-08-25", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant.", + "voice": "alloy", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "max_response_output_tokens": 200, + "speed": 1.1, + "tracing": "auto", + "client_secret": { + "value": "ek_abc123", + "expires_at": 1234567890 + } + } + description: | + Create an ephemeral API token for use in client-side applications with the + Realtime API. Can be configured with the same session parameters as the + `session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains + a usable ephemeral API token that can be used to authenticate browser clients + for the Realtime API. + /realtime/transcription_sessions: + post: + summary: Create transcription session + operationId: create-realtime-transcription-session + tags: + - Realtime + requestBody: + description: Create an ephemeral API key with the given session configuration. + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequest' + responses: + '200': + description: Session created successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateResponse' + x-oaiMeta: + name: Create transcription session + group: realtime + returns: >- + The created [Realtime transcription session + object](https://platform.openai.com/docs/api-reference/realtime-sessions/transcription_session_object), + plus an ephemeral key + examples: + request: + curl: | + curl -X POST https://api.openai.com/v1/realtime/transcription_sessions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{}' + response: | + { + "id": "sess_BBwZc7cFV3XizEyKGDCGL", + "object": "realtime.transcription_session", + "modalities": ["audio", "text"], + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "language": null, + "prompt": "" + }, + "client_secret": null + } + description: | + Create an ephemeral API token for use in client-side applications with the + Realtime API specifically for realtime transcriptions. + Can be configured with the same session parameters as the `transcription_session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains + a usable ephemeral API token that can be used to authenticate browser clients + for the Realtime API. + /responses: + post: + operationId: createResponse + tags: + - Responses + summary: Create a model response + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateResponse' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Response' + text/event-stream: + schema: + $ref: '#/components/schemas/ResponseStreamEvent' + x-oaiMeta: + name: Create a model response + group: responses + returns: | + Returns a [Response](https://platform.openai.com/docs/api-reference/responses/object) object. + path: create + examples: + - title: Text input + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "input": "Tell me a three sentence bedtime story about a unicorn." + }' + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + input: "Tell me a three sentence bedtime story about a unicorn." + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + csharp: > + using System; + + using OpenAI.Responses; + + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + + OpenAIResponse response = client.CreateResponse("Tell me a three sentence bedtime story about + a unicorn."); + + + Console.WriteLine(response.GetOutputText()); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b", + "object": "response", + "created_at": 1741476542, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "message", + "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust.", + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 36, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 87, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 123 + }, + "user": null, + "metadata": {} + } + - title: Image input + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "input": [ + { + "role": "user", + "content": [ + {"type": "input_text", "text": "what is in this image?"}, + { + "type": "input_image", + "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + ] + } + ] + }' + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "what is in this image?" }, + { + type: "input_image", + image_url: + "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + }, + ], + }, + ], + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + csharp: | + using System; + using System.Collections.Generic; + + using OpenAI.Responses; + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + List inputItems = + [ + ResponseItem.CreateUserMessageItem( + [ + ResponseContentPart.CreateInputTextPart("What is in this image?"), + ResponseContentPart.CreateInputImagePart(new Uri("https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg")) + ] + ) + ]; + + OpenAIResponse response = client.CreateResponse(inputItems); + + Console.WriteLine(response.GetOutputText()); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ccd3a9da748190baa7f1570fe91ac604becb25c45c1d41", + "object": "response", + "created_at": 1741476777, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "message", + "id": "msg_67ccd3acc8d48190a77525dc6de64b4104becb25c45c1d41", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "The image depicts a scenic landscape with a wooden boardwalk or pathway leading through lush, green grass under a blue sky with some clouds. The setting suggests a peaceful natural area, possibly a park or nature reserve. There are trees and shrubs in the background.", + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 328, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 52, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 380 + }, + "user": null, + "metadata": {} + } + - title: File input + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "input": [ + { + "role": "user", + "content": [ + {"type": "input_text", "text": "what is in this file?"}, + { + "type": "input_file", + "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf" + } + ] + } + ] + }' + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "what is in this file?" }, + { + type: "input_file", + file_url: "https://www.berkshirehathaway.com/letters/2024ltr.pdf", + }, + ], + }, + ], + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_686eef60237881a2bd1180bb8b13de430e34c516d176ff86", + "object": "response", + "created_at": 1752100704, + "status": "completed", + "background": false, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "id": "msg_686eef60d3e081a29283bdcbc4322fd90e34c516d176ff86", + "type": "message", + "status": "completed", + "content": [ + { + "type": "output_text", + "annotations": [], + "logprobs": [], + "text": "The file seems to contain excerpts from a letter to the shareholders of Berkshire Hathaway Inc., likely written by Warren Buffett. It covers several topics:\n\n1. **Communication Philosophy**: Buffett emphasizes the importance of transparency and candidness in reporting mistakes and successes to shareholders.\n\n2. **Mistakes and Learnings**: The letter acknowledges past mistakes in business assessments and management hires, highlighting the importance of correcting errors promptly.\n\n3. **CEO Succession**: Mention of Greg Abel stepping in as the new CEO and continuing the tradition of honest communication.\n\n4. **Pete Liegl Story**: A detailed account of acquiring Forest River and the relationship with its founder, highlighting trust and effective business decisions.\n\n5. **2024 Performance**: Overview of business performance, particularly in insurance and investment activities, with a focus on GEICO's improvement.\n\n6. **Tax Contributions**: Discussion of significant tax payments to the U.S. Treasury, credited to shareholders' reinvestments.\n\n7. **Investment Strategy**: A breakdown of Berkshire\u2019s investments in both controlled subsidiaries and marketable equities, along with a focus on long-term holding strategies.\n\n8. **American Capitalism**: Reflections on America\u2019s economic development and Berkshire\u2019s role within it.\n\n9. **Property-Casualty Insurance**: Insights into the P/C insurance business model and its challenges and benefits.\n\n10. **Japanese Investments**: Information about Berkshire\u2019s investments in Japanese companies and future plans.\n\n11. **Annual Meeting**: Details about the upcoming annual gathering in Omaha, including schedule changes and new book releases.\n\n12. **Personal Anecdotes**: Light-hearted stories about family and interactions, conveying Buffett's personable approach.\n\n13. **Financial Performance Data**: Tables comparing Berkshire\u2019s annual performance to the S&P 500, showing impressive long-term gains.\n\nOverall, the letter reinforces Berkshire Hathaway's commitment to transparency, investment in both its businesses and the wider economy, and emphasizes strong leadership and prudent financial management." + } + ], + "role": "assistant" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 8438, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 398, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 8836 + }, + "user": null, + "metadata": {} + } + - title: Web search + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "tools": [{ "type": "web_search_preview" }], + "input": "What was a positive news story from today?" + }' + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + tools: [{ type: "web_search_preview" }], + input: "What was a positive news story from today?", + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + csharp: | + using System; + + using OpenAI.Responses; + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + string userInputText = "What was a positive news story from today?"; + + ResponseCreationOptions options = new() + { + Tools = + { + ResponseTool.CreateWebSearchTool() + }, + }; + + OpenAIResponse response = client.CreateResponse(userInputText, options); + + Console.WriteLine(response.GetOutputText()); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ccf18ef5fc8190b16dbee19bc54e5f087bb177ab789d5c", + "object": "response", + "created_at": 1741484430, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "web_search_call", + "id": "ws_67ccf18f64008190a39b619f4c8455ef087bb177ab789d5c", + "status": "completed" + }, + { + "type": "message", + "id": "msg_67ccf190ca3881909d433c50b1f6357e087bb177ab789d5c", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "As of today, March 9, 2025, one notable positive news story...", + "annotations": [ + { + "type": "url_citation", + "start_index": 442, + "end_index": 557, + "url": "https://.../?utm_source=chatgpt.com", + "title": "..." + }, + { + "type": "url_citation", + "start_index": 962, + "end_index": 1077, + "url": "https://.../?utm_source=chatgpt.com", + "title": "..." + }, + { + "type": "url_citation", + "start_index": 1336, + "end_index": 1451, + "url": "https://.../?utm_source=chatgpt.com", + "title": "..." + } + ] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [ + { + "type": "web_search_preview", + "domains": [], + "search_context_size": "medium", + "user_location": { + "type": "approximate", + "city": null, + "country": "US", + "region": null, + "timezone": null + } + } + ], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 328, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 356, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 684 + }, + "user": null, + "metadata": {} + } + - title: File search + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "tools": [{ + "type": "file_search", + "vector_store_ids": ["vs_1234567890"], + "max_num_results": 20 + }], + "input": "What are the attributes of an ancient brown dragon?" + }' + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + tools: [{ + type: "file_search", + vector_store_ids: ["vs_1234567890"], + max_num_results: 20 + }], + input: "What are the attributes of an ancient brown dragon?", + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + csharp: | + using System; + + using OpenAI.Responses; + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + string userInputText = "What are the attributes of an ancient brown dragon?"; + + ResponseCreationOptions options = new() + { + Tools = + { + ResponseTool.CreateFileSearchTool( + vectorStoreIds: ["vs_1234567890"], + maxResultCount: 20 + ) + }, + }; + + OpenAIResponse response = client.CreateResponse(userInputText, options); + + Console.WriteLine(response.GetOutputText()); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ccf4c55fc48190b71bd0463ad3306d09504fb6872380d7", + "object": "response", + "created_at": 1741485253, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "file_search_call", + "id": "fs_67ccf4c63cd08190887ef6464ba5681609504fb6872380d7", + "status": "completed", + "queries": [ + "attributes of an ancient brown dragon" + ], + "results": null + }, + { + "type": "message", + "id": "msg_67ccf4c93e5c81909d595b369351a9d309504fb6872380d7", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "The attributes of an ancient brown dragon include...", + "annotations": [ + { + "type": "file_citation", + "index": 320, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 576, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 815, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 815, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 1030, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 1030, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 1156, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + }, + { + "type": "file_citation", + "index": 1225, + "file_id": "file-4wDz5b167pAf72nx1h9eiN", + "filename": "dragons.pdf" + } + ] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [ + { + "type": "file_search", + "filters": null, + "max_num_results": 20, + "ranking_options": { + "ranker": "auto", + "score_threshold": 0.0 + }, + "vector_store_ids": [ + "vs_1234567890" + ] + } + ], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 18307, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 348, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 18655 + }, + "user": null, + "metadata": {} + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "instructions": "You are a helpful assistant.", + "input": "Hello!", + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "gpt-4.1", + instructions: "You are a helpful assistant.", + input: "Hello!", + stream: true, + }); + + for await (const event of response) { + console.log(event); + } + csharp: > + using System; + + using System.ClientModel; + + using System.Threading.Tasks; + + + using OpenAI.Responses; + + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + + string userInputText = "Hello!"; + + + ResponseCreationOptions options = new() + + { + Instructions = "You are a helpful assistant.", + }; + + + AsyncCollectionResult responseUpdates = + client.CreateResponseStreamingAsync(userInputText, options); + + + await foreach (StreamingResponseUpdate responseUpdate in responseUpdates) + + { + if (responseUpdate is StreamingResponseOutputTextDeltaUpdate outputTextDeltaUpdate) + { + Console.Write(outputTextDeltaUpdate.Delta); + } + } + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: > + event: response.created + + data: + {"type":"response.created","response":{"id":"resp_67c9fdcecf488190bdd9a0409de3a1ec07b8b0ad4e5eb654","object":"response","created_at":1741290958,"status":"in_progress","error":null,"incomplete_details":null,"instructions":"You + are a helpful + assistant.","max_output_tokens":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}} + + + event: response.in_progress + + data: + {"type":"response.in_progress","response":{"id":"resp_67c9fdcecf488190bdd9a0409de3a1ec07b8b0ad4e5eb654","object":"response","created_at":1741290958,"status":"in_progress","error":null,"incomplete_details":null,"instructions":"You + are a helpful + assistant.","max_output_tokens":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}} + + + event: response.output_item.added + + data: + {"type":"response.output_item.added","output_index":0,"item":{"id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","type":"message","status":"in_progress","role":"assistant","content":[]}} + + + event: response.content_part.added + + data: + {"type":"response.content_part.added","item_id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","output_index":0,"content_index":0,"part":{"type":"output_text","text":"","annotations":[]}} + + + event: response.output_text.delta + + data: + {"type":"response.output_text.delta","item_id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","output_index":0,"content_index":0,"delta":"Hi"} + + + ... + + + event: response.output_text.done + + data: + {"type":"response.output_text.done","item_id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","output_index":0,"content_index":0,"text":"Hi + there! How can I assist you today?"} + + + event: response.content_part.done + + data: + {"type":"response.content_part.done","item_id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","output_index":0,"content_index":0,"part":{"type":"output_text","text":"Hi + there! How can I assist you today?","annotations":[]}} + + + event: response.output_item.done + + data: + {"type":"response.output_item.done","output_index":0,"item":{"id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","type":"message","status":"completed","role":"assistant","content":[{"type":"output_text","text":"Hi + there! How can I assist you today?","annotations":[]}]}} + + + event: response.completed + + data: + {"type":"response.completed","response":{"id":"resp_67c9fdcecf488190bdd9a0409de3a1ec07b8b0ad4e5eb654","object":"response","created_at":1741290958,"status":"completed","error":null,"incomplete_details":null,"instructions":"You + are a helpful + assistant.","max_output_tokens":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"msg_67c9fdcf37fc8190ba82116e33fb28c507b8b0ad4e5eb654","type":"message","status":"completed","role":"assistant","content":[{"type":"output_text","text":"Hi + there! How can I assist you + today?","annotations":[]}]}],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":37,"output_tokens":11,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":48},"user":null,"metadata":{}}} + - title: Functions + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4.1", + "input": "What is the weather like in Boston today?", + "tools": [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location", "unit"] + } + } + ], + "tool_choice": "auto" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + javascript: | + import OpenAI from "openai"; + + const openai = new OpenAI(); + + const tools = [ + { + type: "function", + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, + }, + required: ["location", "unit"], + }, + }, + ]; + + const response = await openai.responses.create({ + model: "gpt-4.1", + tools: tools, + input: "What is the weather like in Boston today?", + tool_choice: "auto", + }); + + console.log(response); + csharp: | + using System; + using OpenAI.Responses; + + OpenAIResponseClient client = new( + model: "gpt-4.1", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + ResponseTool getCurrentWeatherFunctionTool = ResponseTool.CreateFunctionTool( + functionName: "get_current_weather", + functionDescription: "Get the current weather in a given location", + functionParameters: BinaryData.FromString(""" + { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location", "unit"] + } + """ + ) + ); + + string userInputText = "What is the weather like in Boston today?"; + + ResponseCreationOptions options = new() + { + Tools = + { + getCurrentWeatherFunctionTool + }, + ToolChoice = ResponseToolChoice.CreateAutoChoice(), + }; + + OpenAIResponse response = client.CreateResponse(userInputText, options); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ca09c5efe0819096d0511c92b8c890096610f474011cc0", + "object": "response", + "created_at": 1741294021, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "function_call", + "id": "fc_67ca09c6bedc8190a7abfec07b1a1332096610f474011cc0", + "call_id": "call_unLAR8MvFNptuiZK6K6HCy5k", + "name": "get_current_weather", + "arguments": "{\"location\":\"Boston, MA\",\"unit\":\"celsius\"}", + "status": "completed" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "description": "Get the current weather in a given location", + "name": "get_current_weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit" + ] + } + }, + "required": [ + "location", + "unit" + ] + }, + "strict": true + } + ], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 291, + "output_tokens": 23, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 314 + }, + "user": null, + "metadata": {} + } + - title: Reasoning + request: + curl: | + curl https://api.openai.com/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "o3-mini", + "input": "How much wood would a woodchuck chuck?", + "reasoning": { + "effort": "high" + } + }' + javascript: | + import OpenAI from "openai"; + const openai = new OpenAI(); + + const response = await openai.responses.create({ + model: "o3-mini", + input: "How much wood would a woodchuck chuck?", + reasoning: { + effort: "high" + } + }); + + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.create() + print(response.id) + csharp: | + using System; + using OpenAI.Responses; + + OpenAIResponseClient client = new( + model: "o3-mini", + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + string userInputText = "How much wood would a woodchuck chuck?"; + + ResponseCreationOptions options = new() + { + ReasoningOptions = new() + { + ReasoningEffortLevel = ResponseReasoningEffortLevel.High, + }, + }; + + OpenAIResponse response = client.CreateResponse(userInputText, options); + + Console.WriteLine(response.GetOutputText()); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.create(); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.New(context.TODO(), responses.ResponseNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.create + + puts(response) + response: | + { + "id": "resp_67ccd7eca01881908ff0b5146584e408072912b2993db808", + "object": "response", + "created_at": 1741477868, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "o1-2024-12-17", + "output": [ + { + "type": "message", + "id": "msg_67ccd7f7b5848190a6f3e95d809f6b44072912b2993db808", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "The classic tongue twister...", + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": "high", + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 81, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 1035, + "output_tokens_details": { + "reasoning_tokens": 832 + }, + "total_tokens": 1116 + }, + "user": null, + "metadata": {} + } + description: > + Creates a model response. Provide [text](https://platform.openai.com/docs/guides/text) or + + [image](https://platform.openai.com/docs/guides/images) inputs to generate + [text](https://platform.openai.com/docs/guides/text) + + or [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have the model call + + your own [custom code](https://platform.openai.com/docs/guides/function-calling) or use built-in + + [tools](https://platform.openai.com/docs/guides/tools) like [web + search](https://platform.openai.com/docs/guides/tools-web-search) + + or [file search](https://platform.openai.com/docs/guides/tools-file-search) to use your own data + + as input for the model's response. + /responses/{response_id}: + get: + operationId: getResponse + tags: + - Responses + summary: Get a model response + parameters: + - in: path + name: response_id + required: true + schema: + type: string + example: resp_677efb5139a88190b512bc3fef8e535d + description: The ID of the response to retrieve. + - in: query + name: include + schema: + type: array + items: + $ref: '#/components/schemas/IncludeEnum' + description: | + Additional fields to include in the response. See the `include` + parameter for Response creation above for more information. + - in: query + name: stream + schema: + type: boolean + description: > + If set to true, the model response data will be streamed to the client + + as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + + See the [Streaming section + below](https://platform.openai.com/docs/api-reference/responses-streaming) + + for more information. + - in: query + name: starting_after + schema: + type: integer + description: | + The sequence number of the event after which to start streaming. + - in: query + name: include_obfuscation + schema: + type: boolean + description: | + When true, stream obfuscation will be enabled. Stream obfuscation adds + random characters to an `obfuscation` field on streaming delta events + to normalize payload sizes as a mitigation to certain side-channel + attacks. These obfuscation fields are included by default, but add a + small amount of overhead to the data stream. You can set + `include_obfuscation` to false to optimize for bandwidth if you trust + the network links between your application and the OpenAI API. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Response' + x-oaiMeta: + name: Get a model response + group: responses + returns: | + The [Response](https://platform.openai.com/docs/api-reference/responses/object) object matching the + specified ID. + examples: + response: | + { + "id": "resp_67cb71b351908190a308f3859487620d06981a8637e6bc44", + "object": "response", + "created_at": 1741386163, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-2024-08-06", + "output": [ + { + "type": "message", + "id": "msg_67cb71b3c2b0819084d481baaaf148f206981a8637e6bc44", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Silent circuits hum, \nThoughts emerge in data streams— \nDigital dawn breaks.", + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 32, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 18, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 50 + }, + "user": null, + "metadata": {} + } + request: + curl: | + curl https://api.openai.com/v1/responses/resp_123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const response = await client.responses.retrieve("resp_123"); + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.retrieve( + response_id="resp_677efb5139a88190b512bc3fef8e535d", + ) + print(response.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.retrieve('resp_677efb5139a88190b512bc3fef8e535d'); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.Get( + context.TODO(), + "resp_677efb5139a88190b512bc3fef8e535d", + responses.ResponseGetParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().retrieve("resp_677efb5139a88190b512bc3fef8e535d"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.retrieve("resp_677efb5139a88190b512bc3fef8e535d") + + puts(response) + description: | + Retrieves a model response with the given ID. + delete: + operationId: deleteResponse + tags: + - Responses + summary: Delete a model response + parameters: + - in: path + name: response_id + required: true + schema: + type: string + example: resp_677efb5139a88190b512bc3fef8e535d + description: The ID of the response to delete. + responses: + '200': + description: OK + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + x-oaiMeta: + name: Delete a model response + group: responses + returns: | + A success message. + examples: + response: | + { + "id": "resp_6786a1bec27481909a17d673315b29f6", + "object": "response", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/responses/resp_123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const response = await client.responses.delete("resp_123"); + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + client.responses.delete( + "resp_677efb5139a88190b512bc3fef8e535d", + ) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + await client.responses.delete('resp_677efb5139a88190b512bc3fef8e535d'); + go: | + package main + + import ( + "context" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + err := client.Responses.Delete(context.TODO(), "resp_677efb5139a88190b512bc3fef8e535d") + if err != nil { + panic(err.Error()) + } + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.ResponseDeleteParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + client.responses().delete("resp_677efb5139a88190b512bc3fef8e535d"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + result = openai.responses.delete("resp_677efb5139a88190b512bc3fef8e535d") + + puts(result) + description: | + Deletes a model response with the given ID. + /responses/{response_id}/cancel: + post: + operationId: cancelResponse + tags: + - Responses + summary: Cancel a response + parameters: + - in: path + name: response_id + required: true + schema: + type: string + example: resp_677efb5139a88190b512bc3fef8e535d + description: The ID of the response to cancel. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Response' + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + x-oaiMeta: + name: Cancel a response + group: responses + returns: | + A [Response](https://platform.openai.com/docs/api-reference/responses/object) object. + examples: + response: | + { + "id": "resp_67cb71b351908190a308f3859487620d06981a8637e6bc44", + "object": "response", + "created_at": 1741386163, + "status": "completed", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-2024-08-06", + "output": [ + { + "type": "message", + "id": "msg_67cb71b3c2b0819084d481baaaf148f206981a8637e6bc44", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "Silent circuits hum, \nThoughts emerge in data streams— \nDigital dawn breaks.", + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 32, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 18, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 50 + }, + "user": null, + "metadata": {} + } + request: + curl: | + curl -X POST https://api.openai.com/v1/responses/resp_123/cancel \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const response = await client.responses.cancel("resp_123"); + console.log(response); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.cancel( + "resp_677efb5139a88190b512bc3fef8e535d", + ) + print(response.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.cancel('resp_677efb5139a88190b512bc3fef8e535d'); + + console.log(response.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.Cancel(context.TODO(), "resp_677efb5139a88190b512bc3fef8e535d") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.Response; + import com.openai.models.responses.ResponseCancelParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Response response = client.responses().cancel("resp_677efb5139a88190b512bc3fef8e535d"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.cancel("resp_677efb5139a88190b512bc3fef8e535d") + + puts(response) + description: | + Cancels a model response with the given ID. Only responses created with + the `background` parameter set to `true` can be cancelled. + [Learn more](https://platform.openai.com/docs/guides/background). + /responses/{response_id}/input_items: + get: + operationId: listInputItems + tags: + - Responses + summary: List input items + parameters: + - in: path + name: response_id + required: true + schema: + type: string + description: The ID of the response to retrieve input items for. + - name: limit + in: query + description: | + A limit on the number of objects to be returned. Limit can range between + 1 and 100, and the default is 20. + required: false + schema: + type: integer + default: 20 + - in: query + name: order + schema: + type: string + enum: + - asc + - desc + description: | + The order to return the input items in. Default is `desc`. + - `asc`: Return the input items in ascending order. + - `desc`: Return the input items in descending order. + - in: query + name: after + schema: + type: string + description: | + An item ID to list items after, used in pagination. + - in: query + name: include + schema: + type: array + items: + $ref: '#/components/schemas/IncludeEnum' + description: | + Additional fields to include in the response. See the `include` + parameter for Response creation above for more information. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ResponseItemList' + x-oaiMeta: + name: List input items + group: responses + returns: A list of input item objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "msg_abc123", + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Tell me a three sentence bedtime story about a unicorn." + } + ] + } + ], + "first_id": "msg_abc123", + "last_id": "msg_abc123", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/responses/resp_abc123/input_items \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const response = await client.responses.inputItems.list("resp_123"); + console.log(response.data); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.responses.input_items.list( + response_id="response_id", + ) + page = page.data[0] + print(page) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const responseItem of client.responses.inputItems.list('response_id')) { + console.log(responseItem); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Responses.InputItems.List( + context.TODO(), + "response_id", + responses.InputItemListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.inputitems.InputItemListPage; + import com.openai.models.responses.inputitems.InputItemListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + InputItemListPage page = client.responses().inputItems().list("response_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.responses.input_items.list("response_id") + + puts(page) + description: Returns a list of input items for a given response. + /threads: + post: + operationId: createThread + tags: + - Assistants + summary: Create thread + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateThreadRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadObject' + x-oaiMeta: + name: Create thread + group: threads + beta: true + returns: A [thread](https://platform.openai.com/docs/api-reference/threads) object. + examples: + - title: Empty + request: + curl: | + curl https://api.openai.com/v1/threads \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread = client.beta.threads.create() + print(thread.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const thread = await client.beta.threads.create(); + + console.log(thread.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + thread, err := client.Beta.Threads.New(context.TODO(), openai.BetaThreadNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", thread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.Thread; + import com.openai.models.beta.threads.ThreadCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Thread thread = client.beta().threads().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread = openai.beta.threads.create + + puts(thread) + response: | + { + "id": "thread_abc123", + "object": "thread", + "created_at": 1699012949, + "metadata": {}, + "tool_resources": {} + } + - title: Messages + request: + curl: | + curl https://api.openai.com/v1/threads \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "messages": [{ + "role": "user", + "content": "Hello, what is AI?" + }, { + "role": "user", + "content": "How does AI work? Explain it in simple terms." + }] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread = client.beta.threads.create() + print(thread.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const thread = await client.beta.threads.create(); + + console.log(thread.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + thread, err := client.Beta.Threads.New(context.TODO(), openai.BetaThreadNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", thread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.Thread; + import com.openai.models.beta.threads.ThreadCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Thread thread = client.beta().threads().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread = openai.beta.threads.create + + puts(thread) + response: | + { + "id": "thread_abc123", + "object": "thread", + "created_at": 1699014083, + "metadata": {}, + "tool_resources": {} + } + description: Create a thread. + /threads/runs: + post: + operationId: createThreadAndRun + tags: + - Assistants + summary: Create thread and run + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateThreadAndRunRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Create thread and run + group: threads + beta: true + returns: A [run](https://platform.openai.com/docs/api-reference/runs/object) object. + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/threads/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_abc123", + "thread": { + "messages": [ + {"role": "user", "content": "Explain deep learning to a 5 year old."} + ] + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.create_and_run( + assistant_id="assistant_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.createAndRun({ assistant_id: 'assistant_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.NewAndRun(context.TODO(), openai.BetaThreadNewAndRunParams{ + AssistantID: "assistant_id", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.ThreadCreateAndRunParams; + import com.openai.models.beta.threads.runs.Run; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadCreateAndRunParams params = ThreadCreateAndRunParams.builder() + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().createAndRun(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.create_and_run(assistant_id: "assistant_id") + + puts(run) + response: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699076792, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "queued", + "started_at": null, + "expires_at": 1699077392, + "cancelled_at": null, + "failed_at": null, + "completed_at": null, + "required_action": null, + "last_error": null, + "model": "gpt-4o", + "instructions": "You are a helpful assistant.", + "tools": [], + "tool_resources": {}, + "metadata": {}, + "temperature": 1.0, + "top_p": 1.0, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "incomplete_details": null, + "usage": null, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/threads/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_123", + "thread": { + "messages": [ + {"role": "user", "content": "Hello"} + ] + }, + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.create_and_run( + assistant_id="assistant_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.createAndRun({ assistant_id: 'assistant_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.NewAndRun(context.TODO(), openai.BetaThreadNewAndRunParams{ + AssistantID: "assistant_id", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.ThreadCreateAndRunParams; + import com.openai.models.beta.threads.runs.Run; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadCreateAndRunParams params = ThreadCreateAndRunParams.builder() + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().createAndRun(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.create_and_run(assistant_id: "assistant_id") + + puts(run) + response: > + event: thread.created + + data: {"id":"thread_123","object":"thread","created_at":1710348075,"metadata":{}} + + + event: thread.run.created + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"tool_resources":{},"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true} + + + event: thread.run.queued + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"tool_resources":{},"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true} + + + event: thread.run.in_progress + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"in_progress","started_at":null,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"tool_resources":{},"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true} + + + event: thread.run.step.created + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.run.step.in_progress + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.message.created + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[], + "metadata":{}} + + + event: thread.message.in_progress + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[], + "metadata":{}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"Hello","annotations":[]}}]}} + + + ... + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + today"}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"?"}}]}} + + + event: thread.message.completed + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"completed","incomplete_details":null,"incomplete_at":null,"completed_at":1710348077,"role":"assistant","content":[{"type":"text","text":{"value":"Hello! + How can I assist you today?","annotations":[]}}], "metadata":{}} + + + event: thread.run.step.completed + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"completed","cancelled_at":null,"completed_at":1710348077,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31}} + + + event: thread.run.completed + + {"id":"run_123","object":"thread.run","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","status":"completed","started_at":1713226836,"expires_at":null,"cancelled_at":null,"failed_at":null,"completed_at":1713226837,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":{"prompt_tokens":345,"completion_tokens":11,"total_tokens":356},"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true} + + + event: done + + data: [DONE] + - title: Streaming with Functions + request: + curl: | + curl https://api.openai.com/v1/threads/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_abc123", + "thread": { + "messages": [ + {"role": "user", "content": "What is the weather like in San Francisco?"} + ] + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.create_and_run( + assistant_id="assistant_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.createAndRun({ assistant_id: 'assistant_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.NewAndRun(context.TODO(), openai.BetaThreadNewAndRunParams{ + AssistantID: "assistant_id", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.ThreadCreateAndRunParams; + import com.openai.models.beta.threads.runs.Run; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadCreateAndRunParams params = ThreadCreateAndRunParams.builder() + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().createAndRun(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.create_and_run(assistant_id: "assistant_id") + + puts(run) + response: > + event: thread.created + + data: {"id":"thread_123","object":"thread","created_at":1710351818,"metadata":{}} + + + event: thread.run.created + + data: + {"id":"run_123","object":"thread.run","created_at":1710351818,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710352418,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.queued + + data: + {"id":"run_123","object":"thread.run","created_at":1710351818,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710352418,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.in_progress + + data: + {"id":"run_123","object":"thread.run","created_at":1710351818,"assistant_id":"asst_123","thread_id":"thread_123","status":"in_progress","started_at":1710351818,"expires_at":1710352418,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.step.created + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710351819,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"tool_calls","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710352418,"failed_at":null,"last_error":null,"step_details":{"type":"tool_calls","tool_calls":[]},"usage":null} + + + event: thread.run.step.in_progress + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710351819,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"tool_calls","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710352418,"failed_at":null,"last_error":null,"step_details":{"type":"tool_calls","tool_calls":[]},"usage":null} + + + event: thread.run.step.delta + + data: + {"id":"step_001","object":"thread.run.step.delta","delta":{"step_details":{"type":"tool_calls","tool_calls":[{"index":0,"id":"call_XXNp8YGaFrjrSjgqxtC8JJ1B","type":"function","function":{"name":"get_current_weather","arguments":"","output":null}}]}}} + + + event: thread.run.step.delta + + data: + {"id":"step_001","object":"thread.run.step.delta","delta":{"step_details":{"type":"tool_calls","tool_calls":[{"index":0,"type":"function","function":{"arguments":"{\""}}]}}} + + + event: thread.run.step.delta + + data: + {"id":"step_001","object":"thread.run.step.delta","delta":{"step_details":{"type":"tool_calls","tool_calls":[{"index":0,"type":"function","function":{"arguments":"location"}}]}}} + + + ... + + + event: thread.run.step.delta + + data: + {"id":"step_001","object":"thread.run.step.delta","delta":{"step_details":{"type":"tool_calls","tool_calls":[{"index":0,"type":"function","function":{"arguments":"ahrenheit"}}]}}} + + + event: thread.run.step.delta + + data: + {"id":"step_001","object":"thread.run.step.delta","delta":{"step_details":{"type":"tool_calls","tool_calls":[{"index":0,"type":"function","function":{"arguments":"\"}"}}]}}} + + + event: thread.run.requires_action + + data: + {"id":"run_123","object":"thread.run","created_at":1710351818,"assistant_id":"asst_123","thread_id":"thread_123","status":"requires_action","started_at":1710351818,"expires_at":1710352418,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":{"type":"submit_tool_outputs","submit_tool_outputs":{"tool_calls":[{"id":"call_XXNp8YGaFrjrSjgqxtC8JJ1B","type":"function","function":{"name":"get_current_weather","arguments":"{\"location\":\"San + Francisco, + CA\",\"unit\":\"fahrenheit\"}"}}]}},"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":{"prompt_tokens":345,"completion_tokens":11,"total_tokens":356},"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: done + + data: [DONE] + description: Create a thread and run it in one request. + /threads/{thread_id}: + get: + operationId: getThread + tags: + - Assistants + summary: Retrieve thread + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to retrieve. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadObject' + x-oaiMeta: + name: Retrieve thread + group: threads + beta: true + returns: >- + The [thread](https://platform.openai.com/docs/api-reference/threads/object) object matching the + specified ID. + examples: + response: | + { + "id": "thread_abc123", + "object": "thread", + "created_at": 1699014083, + "metadata": {}, + "tool_resources": { + "code_interpreter": { + "file_ids": [] + } + } + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread = client.beta.threads.retrieve( + "thread_id", + ) + print(thread.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const thread = await client.beta.threads.retrieve('thread_id'); + + console.log(thread.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + thread, err := client.Beta.Threads.Get(context.TODO(), "thread_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", thread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.Thread; + import com.openai.models.beta.threads.ThreadRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Thread thread = client.beta().threads().retrieve("thread_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread = openai.beta.threads.retrieve("thread_id") + + puts(thread) + description: Retrieves a thread. + post: + operationId: modifyThread + tags: + - Assistants + summary: Modify thread + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to modify. Only the `metadata` can be modified. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ModifyThreadRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadObject' + x-oaiMeta: + name: Modify thread + group: threads + beta: true + returns: >- + The modified [thread](https://platform.openai.com/docs/api-reference/threads/object) object matching + the specified ID. + examples: + response: | + { + "id": "thread_abc123", + "object": "thread", + "created_at": 1699014083, + "metadata": { + "modified": "true", + "user": "abc123" + }, + "tool_resources": {} + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "metadata": { + "modified": "true", + "user": "abc123" + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread = client.beta.threads.update( + thread_id="thread_id", + ) + print(thread.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const thread = await client.beta.threads.update('thread_id'); + + console.log(thread.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + thread, err := client.Beta.Threads.Update( + context.TODO(), + "thread_id", + openai.BetaThreadUpdateParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", thread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.Thread; + import com.openai.models.beta.threads.ThreadUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Thread thread = client.beta().threads().update("thread_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread = openai.beta.threads.update("thread_id") + + puts(thread) + description: Modifies a thread. + delete: + operationId: deleteThread + tags: + - Assistants + summary: Delete thread + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteThreadResponse' + x-oaiMeta: + name: Delete thread + group: threads + beta: true + returns: Deletion status + examples: + response: | + { + "id": "thread_abc123", + "object": "thread.deleted", + "deleted": true + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -X DELETE + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread_deleted = client.beta.threads.delete( + "thread_id", + ) + print(thread_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const threadDeleted = await client.beta.threads.delete('thread_id'); + + console.log(threadDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + threadDeleted, err := client.Beta.Threads.Delete(context.TODO(), "thread_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", threadDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.ThreadDeleteParams; + import com.openai.models.beta.threads.ThreadDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadDeleted threadDeleted = client.beta().threads().delete("thread_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread_deleted = openai.beta.threads.delete("thread_id") + + puts(thread_deleted) + description: Delete a thread. + /threads/{thread_id}/messages: + get: + operationId: listMessages + tags: + - Assistants + summary: List messages + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: >- + The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) the messages belong + to. + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + - name: run_id + in: query + description: | + Filter messages by the run ID that generated them. + schema: + type: string + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListMessagesResponse' + x-oaiMeta: + name: List messages + group: threads + beta: true + returns: A list of [message](https://platform.openai.com/docs/api-reference/messages) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "msg_abc123", + "object": "thread.message", + "created_at": 1699016383, + "assistant_id": null, + "thread_id": "thread_abc123", + "run_id": null, + "role": "user", + "content": [ + { + "type": "text", + "text": { + "value": "How does AI work? Explain it in simple terms.", + "annotations": [] + } + } + ], + "attachments": [], + "metadata": {} + }, + { + "id": "msg_abc456", + "object": "thread.message", + "created_at": 1699016383, + "assistant_id": null, + "thread_id": "thread_abc123", + "run_id": null, + "role": "user", + "content": [ + { + "type": "text", + "text": { + "value": "Hello, what is AI?", + "annotations": [] + } + } + ], + "attachments": [], + "metadata": {} + } + ], + "first_id": "msg_abc123", + "last_id": "msg_abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/messages \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.threads.messages.list( + thread_id="thread_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const message of client.beta.threads.messages.list('thread_id')) { + console.log(message.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.Threads.Messages.List( + context.TODO(), + "thread_id", + openai.BetaThreadMessageListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.messages.MessageListPage; + import com.openai.models.beta.threads.messages.MessageListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageListPage page = client.beta().threads().messages().list("thread_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.threads.messages.list("thread_id") + + puts(page) + description: Returns a list of messages for a given thread. + post: + operationId: createMessage + tags: + - Assistants + summary: Create message + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: >- + The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) to create a message + for. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateMessageRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/MessageObject' + x-oaiMeta: + name: Create message + group: threads + beta: true + returns: A [message](https://platform.openai.com/docs/api-reference/messages/object) object. + examples: + response: | + { + "id": "msg_abc123", + "object": "thread.message", + "created_at": 1713226573, + "assistant_id": null, + "thread_id": "thread_abc123", + "run_id": null, + "role": "user", + "content": [ + { + "type": "text", + "text": { + "value": "How does AI work? Explain it in simple terms.", + "annotations": [] + } + } + ], + "attachments": [], + "metadata": {} + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/messages \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "role": "user", + "content": "How does AI work? Explain it in simple terms." + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + message = client.beta.threads.messages.create( + thread_id="thread_id", + content="string", + role="user", + ) + print(message.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const message = await client.beta.threads.messages.create('thread_id', { content: 'string', + role: 'user' }); + + + console.log(message.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + message, err := client.Beta.Threads.Messages.New( + context.TODO(), + "thread_id", + openai.BetaThreadMessageNewParams{ + Content: openai.BetaThreadMessageNewParamsContentUnion{ + OfString: openai.String("string"), + }, + Role: openai.BetaThreadMessageNewParamsRoleUser, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", message.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.messages.Message; + import com.openai.models.beta.threads.messages.MessageCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageCreateParams params = MessageCreateParams.builder() + .threadId("thread_id") + .content("string") + .role(MessageCreateParams.Role.USER) + .build(); + Message message = client.beta().threads().messages().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + message = openai.beta.threads.messages.create("thread_id", content: "string", role: :user) + + puts(message) + description: Create a message. + /threads/{thread_id}/messages/{message_id}: + get: + operationId: getMessage + tags: + - Assistants + summary: Retrieve message + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: >- + The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) to which this + message belongs. + - in: path + name: message_id + required: true + schema: + type: string + description: The ID of the message to retrieve. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/MessageObject' + x-oaiMeta: + name: Retrieve message + group: threads + beta: true + returns: >- + The [message](https://platform.openai.com/docs/api-reference/messages/object) object matching the + specified ID. + examples: + response: | + { + "id": "msg_abc123", + "object": "thread.message", + "created_at": 1699017614, + "assistant_id": null, + "thread_id": "thread_abc123", + "run_id": null, + "role": "user", + "content": [ + { + "type": "text", + "text": { + "value": "How does AI work? Explain it in simple terms.", + "annotations": [] + } + } + ], + "attachments": [], + "metadata": {} + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/messages/msg_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + message = client.beta.threads.messages.retrieve( + message_id="message_id", + thread_id="thread_id", + ) + print(message.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const message = await client.beta.threads.messages.retrieve('message_id', { thread_id: + 'thread_id' }); + + + console.log(message.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + message, err := client.Beta.Threads.Messages.Get( + context.TODO(), + "thread_id", + "message_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", message.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.messages.Message; + import com.openai.models.beta.threads.messages.MessageRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageRetrieveParams params = MessageRetrieveParams.builder() + .threadId("thread_id") + .messageId("message_id") + .build(); + Message message = client.beta().threads().messages().retrieve(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + message = openai.beta.threads.messages.retrieve("message_id", thread_id: "thread_id") + + puts(message) + description: Retrieve a message. + post: + operationId: modifyMessage + tags: + - Assistants + summary: Modify message + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to which this message belongs. + - in: path + name: message_id + required: true + schema: + type: string + description: The ID of the message to modify. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ModifyMessageRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/MessageObject' + x-oaiMeta: + name: Modify message + group: threads + beta: true + returns: The modified [message](https://platform.openai.com/docs/api-reference/messages/object) object. + examples: + response: | + { + "id": "msg_abc123", + "object": "thread.message", + "created_at": 1699017614, + "assistant_id": null, + "thread_id": "thread_abc123", + "run_id": null, + "role": "user", + "content": [ + { + "type": "text", + "text": { + "value": "How does AI work? Explain it in simple terms.", + "annotations": [] + } + } + ], + "file_ids": [], + "metadata": { + "modified": "true", + "user": "abc123" + } + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/messages/msg_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "metadata": { + "modified": "true", + "user": "abc123" + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + message = client.beta.threads.messages.update( + message_id="message_id", + thread_id="thread_id", + ) + print(message.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const message = await client.beta.threads.messages.update('message_id', { thread_id: 'thread_id' + }); + + + console.log(message.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + message, err := client.Beta.Threads.Messages.Update( + context.TODO(), + "thread_id", + "message_id", + openai.BetaThreadMessageUpdateParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", message.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.messages.Message; + import com.openai.models.beta.threads.messages.MessageUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageUpdateParams params = MessageUpdateParams.builder() + .threadId("thread_id") + .messageId("message_id") + .build(); + Message message = client.beta().threads().messages().update(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + message = openai.beta.threads.messages.update("message_id", thread_id: "thread_id") + + puts(message) + description: Modifies a message. + delete: + operationId: deleteMessage + tags: + - Assistants + summary: Delete message + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to which this message belongs. + - in: path + name: message_id + required: true + schema: + type: string + description: The ID of the message to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteMessageResponse' + x-oaiMeta: + name: Delete message + group: threads + beta: true + returns: Deletion status + examples: + response: | + { + "id": "msg_abc123", + "object": "thread.message.deleted", + "deleted": true + } + request: + curl: | + curl -X DELETE https://api.openai.com/v1/threads/thread_abc123/messages/msg_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + message_deleted = client.beta.threads.messages.delete( + message_id="message_id", + thread_id="thread_id", + ) + print(message_deleted.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const messageDeleted = await client.beta.threads.messages.delete('message_id', { thread_id: + 'thread_id' }); + + + console.log(messageDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + messageDeleted, err := client.Beta.Threads.Messages.Delete( + context.TODO(), + "thread_id", + "message_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", messageDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.messages.MessageDeleteParams; + import com.openai.models.beta.threads.messages.MessageDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + MessageDeleteParams params = MessageDeleteParams.builder() + .threadId("thread_id") + .messageId("message_id") + .build(); + MessageDeleted messageDeleted = client.beta().threads().messages().delete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + message_deleted = openai.beta.threads.messages.delete("message_id", thread_id: "thread_id") + + puts(message_deleted) + description: Deletes a message. + /threads/{thread_id}/runs: + get: + operationId: listRuns + tags: + - Assistants + summary: List runs + parameters: + - name: thread_id + in: path + required: true + schema: + type: string + description: The ID of the thread the run belongs to. + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListRunsResponse' + x-oaiMeta: + name: List runs + group: threads + beta: true + returns: A list of [run](https://platform.openai.com/docs/api-reference/runs/object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699075072, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "completed", + "started_at": 1699075072, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699075073, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "incomplete_details": null, + "tools": [ + { + "type": "code_interpreter" + } + ], + "tool_resources": { + "code_interpreter": { + "file_ids": [ + "file-abc123", + "file-abc456" + ] + } + }, + "metadata": {}, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + }, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + }, + { + "id": "run_abc456", + "object": "thread.run", + "created_at": 1699063290, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "completed", + "started_at": 1699063290, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699063291, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "incomplete_details": null, + "tools": [ + { + "type": "code_interpreter" + } + ], + "tool_resources": { + "code_interpreter": { + "file_ids": [ + "file-abc123", + "file-abc456" + ] + } + }, + "metadata": {}, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + }, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + ], + "first_id": "run_abc123", + "last_id": "run_abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.threads.runs.list( + thread_id="thread_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const run of client.beta.threads.runs.list('thread_id')) { + console.log(run.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.Threads.Runs.List( + context.TODO(), + "thread_id", + openai.BetaThreadRunListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.RunListPage; + import com.openai.models.beta.threads.runs.RunListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunListPage page = client.beta().threads().runs().list("thread_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.threads.runs.list("thread_id") + + puts(page) + description: Returns a list of runs belonging to a thread. + post: + operationId: createRun + tags: + - Assistants + summary: Create run + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to run. + - name: include[] + in: query + description: > + A list of additional fields to include in the response. Currently the only supported value is + `step_details.tool_calls[*].file_search.results[*].content` to fetch the file search result + content. + + + See the [file search tool + documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + schema: + type: array + items: + type: string + enum: + - step_details.tool_calls[*].file_search.results[*].content + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateRunRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Create run + group: threads + beta: true + returns: A [run](https://platform.openai.com/docs/api-reference/runs/object) object. + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_abc123" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.create( + thread_id="thread_id", + assistant_id="assistant_id", + ) + print(run.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const run = await client.beta.threads.runs.create('thread_id', { assistant_id: 'assistant_id' + }); + + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.New( + context.TODO(), + "thread_id", + openai.BetaThreadRunNewParams{ + AssistantID: "assistant_id", + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCreateParams params = RunCreateParams.builder() + .threadId("thread_id") + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().runs().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.create("thread_id", assistant_id: "assistant_id") + + puts(run) + response: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699063290, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "queued", + "started_at": 1699063290, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699063291, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "incomplete_details": null, + "tools": [ + { + "type": "code_interpreter" + } + ], + "metadata": {}, + "usage": null, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/threads/thread_123/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_123", + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.create( + thread_id="thread_id", + assistant_id="assistant_id", + ) + print(run.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const run = await client.beta.threads.runs.create('thread_id', { assistant_id: 'assistant_id' + }); + + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.New( + context.TODO(), + "thread_id", + openai.BetaThreadRunNewParams{ + AssistantID: "assistant_id", + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCreateParams params = RunCreateParams.builder() + .threadId("thread_id") + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().runs().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.create("thread_id", assistant_id: "assistant_id") + + puts(run) + response: > + event: thread.run.created + + data: + {"id":"run_123","object":"thread.run","created_at":1710330640,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710331240,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.queued + + data: + {"id":"run_123","object":"thread.run","created_at":1710330640,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710331240,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.in_progress + + data: + {"id":"run_123","object":"thread.run","created_at":1710330640,"assistant_id":"asst_123","thread_id":"thread_123","status":"in_progress","started_at":1710330641,"expires_at":1710331240,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.step.created + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710330641,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710331240,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.run.step.in_progress + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710330641,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710331240,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.message.created + + data: + {"id":"msg_001","object":"thread.message","created_at":1710330641,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.in_progress + + data: + {"id":"msg_001","object":"thread.message","created_at":1710330641,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"Hello","annotations":[]}}]}} + + + ... + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + today"}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"?"}}]}} + + + event: thread.message.completed + + data: + {"id":"msg_001","object":"thread.message","created_at":1710330641,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"completed","incomplete_details":null,"incomplete_at":null,"completed_at":1710330642,"role":"assistant","content":[{"type":"text","text":{"value":"Hello! + How can I assist you today?","annotations":[]}}],"metadata":{}} + + + event: thread.run.step.completed + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710330641,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"completed","cancelled_at":null,"completed_at":1710330642,"expires_at":1710331240,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31}} + + + event: thread.run.completed + + data: + {"id":"run_123","object":"thread.run","created_at":1710330640,"assistant_id":"asst_123","thread_id":"thread_123","status":"completed","started_at":1710330641,"expires_at":null,"cancelled_at":null,"failed_at":null,"completed_at":1710330642,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31},"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: done + + data: [DONE] + - title: Streaming with Functions + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "assistant_id": "asst_abc123", + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.create( + thread_id="thread_id", + assistant_id="assistant_id", + ) + print(run.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const run = await client.beta.threads.runs.create('thread_id', { assistant_id: 'assistant_id' + }); + + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.New( + context.TODO(), + "thread_id", + openai.BetaThreadRunNewParams{ + AssistantID: "assistant_id", + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCreateParams params = RunCreateParams.builder() + .threadId("thread_id") + .assistantId("assistant_id") + .build(); + Run run = client.beta().threads().runs().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.create("thread_id", assistant_id: "assistant_id") + + puts(run) + response: > + event: thread.run.created + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.queued + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":null,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.in_progress + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"in_progress","started_at":1710348075,"expires_at":1710348675,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.step.created + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.run.step.in_progress + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":null} + + + event: thread.message.created + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.in_progress + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"Hello","annotations":[]}}]}} + + + ... + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + today"}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_001","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"?"}}]}} + + + event: thread.message.completed + + data: + {"id":"msg_001","object":"thread.message","created_at":1710348076,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"completed","incomplete_details":null,"incomplete_at":null,"completed_at":1710348077,"role":"assistant","content":[{"type":"text","text":{"value":"Hello! + How can I assist you today?","annotations":[]}}],"metadata":{}} + + + event: thread.run.step.completed + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710348076,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"completed","cancelled_at":null,"completed_at":1710348077,"expires_at":1710348675,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_001"}},"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31}} + + + event: thread.run.completed + + data: + {"id":"run_123","object":"thread.run","created_at":1710348075,"assistant_id":"asst_123","thread_id":"thread_123","status":"completed","started_at":1710348075,"expires_at":null,"cancelled_at":null,"failed_at":null,"completed_at":1710348077,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31},"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: done + + data: [DONE] + description: Create a run. + /threads/{thread_id}/runs/{run_id}: + get: + operationId: getRun + tags: + - Assistants + summary: Retrieve run + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was run. + - in: path + name: run_id + required: true + schema: + type: string + description: The ID of the run to retrieve. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Retrieve run + group: threads + beta: true + returns: >- + The [run](https://platform.openai.com/docs/api-reference/runs/object) object matching the specified + ID. + examples: + response: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699075072, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "completed", + "started_at": 1699075072, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699075073, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "incomplete_details": null, + "tools": [ + { + "type": "code_interpreter" + } + ], + "metadata": {}, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + }, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs/run_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.retrieve( + run_id="run_id", + thread_id="thread_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.runs.retrieve('run_id', { thread_id: 'thread_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.Get( + context.TODO(), + "thread_id", + "run_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunRetrieveParams params = RunRetrieveParams.builder() + .threadId("thread_id") + .runId("run_id") + .build(); + Run run = client.beta().threads().runs().retrieve(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.retrieve("run_id", thread_id: "thread_id") + + puts(run) + description: Retrieves a run. + post: + operationId: modifyRun + tags: + - Assistants + summary: Modify run + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was run. + - in: path + name: run_id + required: true + schema: + type: string + description: The ID of the run to modify. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ModifyRunRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Modify run + group: threads + beta: true + returns: >- + The modified [run](https://platform.openai.com/docs/api-reference/runs/object) object matching the + specified ID. + examples: + response: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699075072, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "completed", + "started_at": 1699075072, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699075073, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "incomplete_details": null, + "tools": [ + { + "type": "code_interpreter" + } + ], + "tool_resources": { + "code_interpreter": { + "file_ids": [ + "file-abc123", + "file-abc456" + ] + } + }, + "metadata": { + "user_id": "user_abc123" + }, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + }, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs/run_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "metadata": { + "user_id": "user_abc123" + } + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.update( + run_id="run_id", + thread_id="thread_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.runs.update('run_id', { thread_id: 'thread_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.Update( + context.TODO(), + "thread_id", + "run_id", + openai.BetaThreadRunUpdateParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunUpdateParams params = RunUpdateParams.builder() + .threadId("thread_id") + .runId("run_id") + .build(); + Run run = client.beta().threads().runs().update(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.update("run_id", thread_id: "thread_id") + + puts(run) + description: Modifies a run. + /threads/{thread_id}/runs/{run_id}/cancel: + post: + operationId: cancelRun + tags: + - Assistants + summary: Cancel a run + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to which this run belongs. + - in: path + name: run_id + required: true + schema: + type: string + description: The ID of the run to cancel. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Cancel a run + group: threads + beta: true + returns: >- + The modified [run](https://platform.openai.com/docs/api-reference/runs/object) object matching the + specified ID. + examples: + response: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1699076126, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "cancelling", + "started_at": 1699076126, + "expires_at": 1699076726, + "cancelled_at": null, + "failed_at": null, + "completed_at": null, + "last_error": null, + "model": "gpt-4o", + "instructions": "You summarize books.", + "tools": [ + { + "type": "file_search" + } + ], + "tool_resources": { + "file_search": { + "vector_store_ids": ["vs_123"] + } + }, + "metadata": {}, + "usage": null, + "temperature": 1.0, + "top_p": 1.0, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs/run_abc123/cancel \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: assistants=v2" \ + -X POST + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.cancel( + run_id="run_id", + thread_id="thread_id", + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.runs.cancel('run_id', { thread_id: 'thread_id' }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.Cancel( + context.TODO(), + "thread_id", + "run_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunCancelParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunCancelParams params = RunCancelParams.builder() + .threadId("thread_id") + .runId("run_id") + .build(); + Run run = client.beta().threads().runs().cancel(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + run = openai.beta.threads.runs.cancel("run_id", thread_id: "thread_id") + + puts(run) + description: Cancels a run that is `in_progress`. + /threads/{thread_id}/runs/{run_id}/steps: + get: + operationId: listRunSteps + tags: + - Assistants + summary: List run steps + parameters: + - name: thread_id + in: path + required: true + schema: + type: string + description: The ID of the thread the run and run steps belong to. + - name: run_id + in: path + required: true + schema: + type: string + description: The ID of the run the run steps belong to. + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + - name: include[] + in: query + description: > + A list of additional fields to include in the response. Currently the only supported value is + `step_details.tool_calls[*].file_search.results[*].content` to fetch the file search result + content. + + + See the [file search tool + documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + schema: + type: array + items: + type: string + enum: + - step_details.tool_calls[*].file_search.results[*].content + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListRunStepsResponse' + x-oaiMeta: + name: List run steps + group: threads + beta: true + returns: A list of [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "step_abc123", + "object": "thread.run.step", + "created_at": 1699063291, + "run_id": "run_abc123", + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "type": "message_creation", + "status": "completed", + "cancelled_at": null, + "completed_at": 1699063291, + "expired_at": null, + "failed_at": null, + "last_error": null, + "step_details": { + "type": "message_creation", + "message_creation": { + "message_id": "msg_abc123" + } + }, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + } + } + ], + "first_id": "step_abc123", + "last_id": "step_abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs/run_abc123/steps \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.threads.runs.steps.list( + run_id="run_id", + thread_id="thread_id", + ) + page = page.data[0] + print(page.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + // Automatically fetches more pages as needed. + + for await (const runStep of client.beta.threads.runs.steps.list('run_id', { thread_id: + 'thread_id' })) { + console.log(runStep.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.Threads.Runs.Steps.List( + context.TODO(), + "thread_id", + "run_id", + openai.BetaThreadRunStepListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.steps.StepListPage; + import com.openai.models.beta.threads.runs.steps.StepListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + StepListParams params = StepListParams.builder() + .threadId("thread_id") + .runId("run_id") + .build(); + StepListPage page = client.beta().threads().runs().steps().list(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.threads.runs.steps.list("run_id", thread_id: "thread_id") + + puts(page) + description: Returns a list of run steps belonging to a run. + /threads/{thread_id}/runs/{run_id}/steps/{step_id}: + get: + operationId: getRunStep + tags: + - Assistants + summary: Retrieve run step + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: The ID of the thread to which the run and run step belongs. + - in: path + name: run_id + required: true + schema: + type: string + description: The ID of the run to which the run step belongs. + - in: path + name: step_id + required: true + schema: + type: string + description: The ID of the run step to retrieve. + - name: include[] + in: query + description: > + A list of additional fields to include in the response. Currently the only supported value is + `step_details.tool_calls[*].file_search.results[*].content` to fetch the file search result + content. + + + See the [file search tool + documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + schema: + type: array + items: + type: string + enum: + - step_details.tool_calls[*].file_search.results[*].content + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunStepObject' + x-oaiMeta: + name: Retrieve run step + group: threads + beta: true + returns: >- + The [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) object matching + the specified ID. + examples: + response: | + { + "id": "step_abc123", + "object": "thread.run.step", + "created_at": 1699063291, + "run_id": "run_abc123", + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "type": "message_creation", + "status": "completed", + "cancelled_at": null, + "completed_at": 1699063291, + "expired_at": null, + "failed_at": null, + "last_error": null, + "step_details": { + "type": "message_creation", + "message_creation": { + "message_id": "msg_abc123" + } + }, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + } + } + request: + curl: | + curl https://api.openai.com/v1/threads/thread_abc123/runs/run_abc123/steps/step_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run_step = client.beta.threads.runs.steps.retrieve( + step_id="step_id", + thread_id="thread_id", + run_id="run_id", + ) + print(run_step.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const runStep = await client.beta.threads.runs.steps.retrieve('step_id', { + thread_id: 'thread_id', + run_id: 'run_id', + }); + + console.log(runStep.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + runStep, err := client.Beta.Threads.Runs.Steps.Get( + context.TODO(), + "thread_id", + "run_id", + "step_id", + openai.BetaThreadRunStepGetParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", runStep.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.steps.RunStep; + import com.openai.models.beta.threads.runs.steps.StepRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + StepRetrieveParams params = StepRetrieveParams.builder() + .threadId("thread_id") + .runId("run_id") + .stepId("step_id") + .build(); + RunStep runStep = client.beta().threads().runs().steps().retrieve(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + run_step = openai.beta.threads.runs.steps.retrieve("step_id", thread_id: "thread_id", run_id: + "run_id") + + + puts(run_step) + description: Retrieves a run step. + /threads/{thread_id}/runs/{run_id}/submit_tool_outputs: + post: + operationId: submitToolOuputsToRun + tags: + - Assistants + summary: Submit tool outputs to run + parameters: + - in: path + name: thread_id + required: true + schema: + type: string + description: >- + The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) to which this run + belongs. + - in: path + name: run_id + required: true + schema: + type: string + description: The ID of the run that requires the tool output submission. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/SubmitToolOutputsRunRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/RunObject' + x-oaiMeta: + name: Submit tool outputs to run + group: threads + beta: true + returns: >- + The modified [run](https://platform.openai.com/docs/api-reference/runs/object) object matching the + specified ID. + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/threads/thread_123/runs/run_123/submit_tool_outputs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "tool_outputs": [ + { + "tool_call_id": "call_001", + "output": "70 degrees and sunny." + } + ] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.submit_tool_outputs( + run_id="run_id", + thread_id="thread_id", + tool_outputs=[{}], + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.runs.submitToolOutputs('run_id', { + thread_id: 'thread_id', + tool_outputs: [{}], + }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.SubmitToolOutputs( + context.TODO(), + "thread_id", + "run_id", + openai.BetaThreadRunSubmitToolOutputsParams{ + ToolOutputs: []openai.BetaThreadRunSubmitToolOutputsParamsToolOutput{openai.BetaThreadRunSubmitToolOutputsParamsToolOutput{ + + }}, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunSubmitToolOutputsParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunSubmitToolOutputsParams params = RunSubmitToolOutputsParams.builder() + .threadId("thread_id") + .runId("run_id") + .addToolOutput(RunSubmitToolOutputsParams.ToolOutput.builder().build()) + .build(); + Run run = client.beta().threads().runs().submitToolOutputs(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + run = openai.beta.threads.runs.submit_tool_outputs("run_id", thread_id: "thread_id", + tool_outputs: [{}]) + + + puts(run) + response: | + { + "id": "run_123", + "object": "thread.run", + "created_at": 1699075592, + "assistant_id": "asst_123", + "thread_id": "thread_123", + "status": "queued", + "started_at": 1699075592, + "expires_at": 1699076192, + "cancelled_at": null, + "failed_at": null, + "completed_at": null, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "metadata": {}, + "usage": null, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/threads/thread_123/runs/run_123/submit_tool_outputs \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "tool_outputs": [ + { + "tool_call_id": "call_001", + "output": "70 degrees and sunny." + } + ], + "stream": true + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + run = client.beta.threads.runs.submit_tool_outputs( + run_id="run_id", + thread_id="thread_id", + tool_outputs=[{}], + ) + print(run.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const run = await client.beta.threads.runs.submitToolOutputs('run_id', { + thread_id: 'thread_id', + tool_outputs: [{}], + }); + + console.log(run.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + run, err := client.Beta.Threads.Runs.SubmitToolOutputs( + context.TODO(), + "thread_id", + "run_id", + openai.BetaThreadRunSubmitToolOutputsParams{ + ToolOutputs: []openai.BetaThreadRunSubmitToolOutputsParamsToolOutput{openai.BetaThreadRunSubmitToolOutputsParamsToolOutput{ + + }}, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", run.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.threads.runs.Run; + import com.openai.models.beta.threads.runs.RunSubmitToolOutputsParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + RunSubmitToolOutputsParams params = RunSubmitToolOutputsParams.builder() + .threadId("thread_id") + .runId("run_id") + .addToolOutput(RunSubmitToolOutputsParams.ToolOutput.builder().build()) + .build(); + Run run = client.beta().threads().runs().submitToolOutputs(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + run = openai.beta.threads.runs.submit_tool_outputs("run_id", thread_id: "thread_id", + tool_outputs: [{}]) + + + puts(run) + response: > + event: thread.run.step.completed + + data: + {"id":"step_001","object":"thread.run.step","created_at":1710352449,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"tool_calls","status":"completed","cancelled_at":null,"completed_at":1710352475,"expires_at":1710353047,"failed_at":null,"last_error":null,"step_details":{"type":"tool_calls","tool_calls":[{"id":"call_iWr0kQ2EaYMaxNdl0v3KYkx7","type":"function","function":{"name":"get_current_weather","arguments":"{\"location\":\"San + Francisco, CA\",\"unit\":\"fahrenheit\"}","output":"70 degrees and + sunny."}}]},"usage":{"prompt_tokens":291,"completion_tokens":24,"total_tokens":315}} + + + event: thread.run.queued + + data: + {"id":"run_123","object":"thread.run","created_at":1710352447,"assistant_id":"asst_123","thread_id":"thread_123","status":"queued","started_at":1710352448,"expires_at":1710353047,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.in_progress + + data: + {"id":"run_123","object":"thread.run","created_at":1710352447,"assistant_id":"asst_123","thread_id":"thread_123","status":"in_progress","started_at":1710352475,"expires_at":1710353047,"cancelled_at":null,"failed_at":null,"completed_at":null,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":null,"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: thread.run.step.created + + data: + {"id":"step_002","object":"thread.run.step","created_at":1710352476,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710353047,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_002"}},"usage":null} + + + event: thread.run.step.in_progress + + data: + {"id":"step_002","object":"thread.run.step","created_at":1710352476,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"in_progress","cancelled_at":null,"completed_at":null,"expires_at":1710353047,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_002"}},"usage":null} + + + event: thread.message.created + + data: + {"id":"msg_002","object":"thread.message","created_at":1710352476,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.in_progress + + data: + {"id":"msg_002","object":"thread.message","created_at":1710352476,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"in_progress","incomplete_details":null,"incomplete_at":null,"completed_at":null,"role":"assistant","content":[],"metadata":{}} + + + event: thread.message.delta + + data: + {"id":"msg_002","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"The","annotations":[]}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_002","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + current"}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_002","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + weather"}}]}} + + + ... + + + event: thread.message.delta + + data: + {"id":"msg_002","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":" + sunny"}}]}} + + + event: thread.message.delta + + data: + {"id":"msg_002","object":"thread.message.delta","delta":{"content":[{"index":0,"type":"text","text":{"value":"."}}]}} + + + event: thread.message.completed + + data: + {"id":"msg_002","object":"thread.message","created_at":1710352476,"assistant_id":"asst_123","thread_id":"thread_123","run_id":"run_123","status":"completed","incomplete_details":null,"incomplete_at":null,"completed_at":1710352477,"role":"assistant","content":[{"type":"text","text":{"value":"The + current weather in San Francisco, CA is 70 degrees Fahrenheit and + sunny.","annotations":[]}}],"metadata":{}} + + + event: thread.run.step.completed + + data: + {"id":"step_002","object":"thread.run.step","created_at":1710352476,"run_id":"run_123","assistant_id":"asst_123","thread_id":"thread_123","type":"message_creation","status":"completed","cancelled_at":null,"completed_at":1710352477,"expires_at":1710353047,"failed_at":null,"last_error":null,"step_details":{"type":"message_creation","message_creation":{"message_id":"msg_002"}},"usage":{"prompt_tokens":329,"completion_tokens":18,"total_tokens":347}} + + + event: thread.run.completed + + data: + {"id":"run_123","object":"thread.run","created_at":1710352447,"assistant_id":"asst_123","thread_id":"thread_123","status":"completed","started_at":1710352475,"expires_at":null,"cancelled_at":null,"failed_at":null,"completed_at":1710352477,"required_action":null,"last_error":null,"model":"gpt-4o","instructions":null,"tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given + location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, + CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}],"metadata":{},"temperature":1.0,"top_p":1.0,"max_completion_tokens":null,"max_prompt_tokens":null,"truncation_strategy":{"type":"auto","last_messages":null},"incomplete_details":null,"usage":{"prompt_tokens":20,"completion_tokens":11,"total_tokens":31},"response_format":"auto","tool_choice":"auto","parallel_tool_calls":true}} + + + event: done + + data: [DONE] + description: > + When a run has the `status: "requires_action"` and `required_action.type` is `submit_tool_outputs`, + this endpoint can be used to submit the outputs from the tool calls once they're all completed. All + outputs must be submitted in a single request. + /uploads: + post: + operationId: createUpload + tags: + - Uploads + summary: Create upload + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUploadRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Upload' + x-oaiMeta: + name: Create upload + group: uploads + returns: >- + The [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object with status + `pending`. + examples: + response: | + { + "id": "upload_abc123", + "object": "upload", + "bytes": 2147483648, + "created_at": 1719184911, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + "status": "pending", + "expires_at": 1719127296 + } + request: + curl: | + curl https://api.openai.com/v1/uploads \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "purpose": "fine-tune", + "filename": "training_examples.jsonl", + "bytes": 2147483648, + "mime_type": "text/jsonl", + "expires_after": { + "anchor": "created_at", + "seconds": 3600 + } + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const upload = await client.uploads.create({ + bytes: 0, + filename: 'filename', + mime_type: 'mime_type', + purpose: 'assistants', + }); + + console.log(upload.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + upload = client.uploads.create( + bytes=0, + filename="filename", + mime_type="mime_type", + purpose="assistants", + ) + print(upload.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + upload, err := client.Uploads.New(context.TODO(), openai.UploadNewParams{ + Bytes: 0, + Filename: "filename", + MimeType: "mime_type", + Purpose: openai.FilePurposeAssistants, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", upload.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.files.FilePurpose; + import com.openai.models.uploads.Upload; + import com.openai.models.uploads.UploadCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + UploadCreateParams params = UploadCreateParams.builder() + .bytes(0L) + .filename("filename") + .mimeType("mime_type") + .purpose(FilePurpose.ASSISTANTS) + .build(); + Upload upload = client.uploads().create(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + upload = openai.uploads.create(bytes: 0, filename: "filename", mime_type: "mime_type", purpose: + :assistants) + + + puts(upload) + description: > + Creates an intermediate [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object + + that you can add [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to. + + Currently, an Upload can accept at most 8 GB in total and expires after an + + hour after you create it. + + + Once you complete the Upload, we will create a + + [File](https://platform.openai.com/docs/api-reference/files/object) object that contains all the parts + + you uploaded. This File is usable in the rest of our platform as a regular + + File object. + + + For certain `purpose` values, the correct `mime_type` must be specified. + + Please refer to documentation for the + + [supported MIME types for your use + case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files). + + + For guidance on the proper filename extensions for each purpose, please + + follow the documentation on [creating a + + File](https://platform.openai.com/docs/api-reference/files/create). + /uploads/{upload_id}/cancel: + post: + operationId: cancelUpload + tags: + - Uploads + summary: Cancel upload + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + example: upload_abc123 + description: | + The ID of the Upload. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Upload' + x-oaiMeta: + name: Cancel upload + group: uploads + returns: >- + The [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object with status + `cancelled`. + examples: + response: | + { + "id": "upload_abc123", + "object": "upload", + "bytes": 2147483648, + "created_at": 1719184911, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + "status": "cancelled", + "expires_at": 1719127296 + } + request: + curl: | + curl https://api.openai.com/v1/uploads/upload_abc123/cancel + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const upload = await client.uploads.cancel('upload_abc123'); + + console.log(upload.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + upload = client.uploads.cancel( + "upload_abc123", + ) + print(upload.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + upload, err := client.Uploads.Cancel(context.TODO(), "upload_abc123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", upload.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.uploads.Upload; + import com.openai.models.uploads.UploadCancelParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Upload upload = client.uploads().cancel("upload_abc123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + upload = openai.uploads.cancel("upload_abc123") + + puts(upload) + description: | + Cancels the Upload. No Parts may be added after an Upload is cancelled. + /uploads/{upload_id}/complete: + post: + operationId: completeUpload + tags: + - Uploads + summary: Complete upload + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + example: upload_abc123 + description: | + The ID of the Upload. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CompleteUploadRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/Upload' + x-oaiMeta: + name: Complete upload + group: uploads + returns: >- + The [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object with status + `completed` with an additional `file` property containing the created usable File object. + examples: + response: | + { + "id": "upload_abc123", + "object": "upload", + "bytes": 2147483648, + "created_at": 1719184911, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + "status": "completed", + "expires_at": 1719127296, + "file": { + "id": "file-xyz321", + "object": "file", + "bytes": 2147483648, + "created_at": 1719186911, + "expires_at": 1719127296, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + } + } + request: + curl: | + curl https://api.openai.com/v1/uploads/upload_abc123/complete + -d '{ + "part_ids": ["part_def456", "part_ghi789"] + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const upload = await client.uploads.complete('upload_abc123', { part_ids: ['string'] }); + + console.log(upload.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + upload = client.uploads.complete( + upload_id="upload_abc123", + part_ids=["string"], + ) + print(upload.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + upload, err := client.Uploads.Complete( + context.TODO(), + "upload_abc123", + openai.UploadCompleteParams{ + PartIDs: []string{"string"}, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", upload.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.uploads.Upload; + import com.openai.models.uploads.UploadCompleteParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + UploadCompleteParams params = UploadCompleteParams.builder() + .uploadId("upload_abc123") + .addPartId("string") + .build(); + Upload upload = client.uploads().complete(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + upload = openai.uploads.complete("upload_abc123", part_ids: ["string"]) + + puts(upload) + description: > + Completes the [Upload](https://platform.openai.com/docs/api-reference/uploads/object). + + + Within the returned Upload object, there is a nested + [File](https://platform.openai.com/docs/api-reference/files/object) object that is ready to use in the + rest of the platform. + + + You can specify the order of the Parts by passing in an ordered list of the Part IDs. + + + The number of bytes uploaded upon completion must match the number of bytes initially specified when + creating the Upload object. No Parts may be added after an Upload is completed. + /uploads/{upload_id}/parts: + post: + operationId: addUploadPart + tags: + - Uploads + summary: Add upload part + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + example: upload_abc123 + description: | + The ID of the Upload. + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/AddUploadPartRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/UploadPart' + x-oaiMeta: + name: Add upload part + group: uploads + returns: The upload [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) object. + examples: + response: | + { + "id": "part_def456", + "object": "upload.part", + "created_at": 1719185911, + "upload_id": "upload_abc123" + } + request: + curl: | + curl https://api.openai.com/v1/uploads/upload_abc123/parts + -F data="aHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MS91cGxvYWRz..." + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const uploadPart = await client.uploads.parts.create('upload_abc123', { + data: fs.createReadStream('path/to/file'), + }); + + console.log(uploadPart.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + upload_part = client.uploads.parts.create( + upload_id="upload_abc123", + data=b"raw file contents", + ) + print(upload_part.id) + go: | + package main + + import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + uploadPart, err := client.Uploads.Parts.New( + context.TODO(), + "upload_abc123", + openai.UploadPartNewParams{ + Data: io.Reader(bytes.NewBuffer([]byte("some file contents"))), + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", uploadPart.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.uploads.parts.PartCreateParams; + import com.openai.models.uploads.parts.UploadPart; + import java.io.ByteArrayInputStream; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + PartCreateParams params = PartCreateParams.builder() + .uploadId("upload_abc123") + .data(ByteArrayInputStream("some content".getBytes())) + .build(); + UploadPart uploadPart = client.uploads().parts().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + upload_part = openai.uploads.parts.create("upload_abc123", data: Pathname(__FILE__)) + + puts(upload_part) + description: > + Adds a [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an + [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object. A Part represents a + chunk of bytes from the file you are trying to upload. + + + Each Part can be at most 64 MB, and you can add Parts until you hit the Upload maximum of 8 GB. + + + It is possible to add multiple Parts in parallel. You can decide the intended order of the Parts when + you [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete). + /vector_stores: + get: + operationId: listVectorStores + tags: + - Vector stores + summary: List vector stores + parameters: + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListVectorStoresResponse' + x-oaiMeta: + name: List vector stores + group: vector_stores + returns: >- + A list of [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "vs_abc123", + "object": "vector_store", + "created_at": 1699061776, + "name": "Support FAQ", + "description": "Contains commonly asked questions and answers, organized by topic.", + "bytes": 139920, + "file_counts": { + "in_progress": 0, + "completed": 3, + "failed": 0, + "cancelled": 0, + "total": 3 + } + }, + { + "id": "vs_abc456", + "object": "vector_store", + "created_at": 1699061776, + "name": "Support FAQ v2", + "description": null, + "bytes": 139920, + "file_counts": { + "in_progress": 0, + "completed": 3, + "failed": 0, + "cancelled": 0, + "total": 3 + } + } + ], + "first_id": "vs_abc123", + "last_id": "vs_abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.vector_stores.list() + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const vectorStore of client.vectorStores.list()) { + console.log(vectorStore.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.VectorStores.List(context.TODO(), openai.VectorStoreListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStoreListPage; + import com.openai.models.vectorstores.VectorStoreListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStoreListPage page = client.vectorStores().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.vector_stores.list + + puts(page) + description: Returns a list of vector stores. + post: + operationId: createVectorStore + tags: + - Vector stores + summary: Create vector store + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateVectorStoreRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + x-oaiMeta: + name: Create vector store + group: vector_stores + returns: A [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) object. + examples: + response: | + { + "id": "vs_abc123", + "object": "vector_store", + "created_at": 1699061776, + "name": "Support FAQ", + "description": "Contains commonly asked questions and answers, organized by topic.", + "bytes": 139920, + "file_counts": { + "in_progress": 0, + "completed": 3, + "failed": 0, + "cancelled": 0, + "total": 3 + } + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "name": "Support FAQ" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store = client.vector_stores.create() + print(vector_store.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStore = await client.vectorStores.create(); + + console.log(vectorStore.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStore, err := client.VectorStores.New(context.TODO(), openai.VectorStoreNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStore.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStore; + import com.openai.models.vectorstores.VectorStoreCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStore vectorStore = client.vectorStores().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store = openai.vector_stores.create + + puts(vector_store) + description: Create a vector store. + /vector_stores/{vector_store_id}: + get: + operationId: getVectorStore + tags: + - Vector stores + summary: Retrieve vector store + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + description: The ID of the vector store to retrieve. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + x-oaiMeta: + name: Retrieve vector store + group: vector_stores + returns: >- + The [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) object + matching the specified ID. + examples: + response: | + { + "id": "vs_abc123", + "object": "vector_store", + "created_at": 1699061776 + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store = client.vector_stores.retrieve( + "vector_store_id", + ) + print(vector_store.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStore = await client.vectorStores.retrieve('vector_store_id'); + + console.log(vectorStore.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStore, err := client.VectorStores.Get(context.TODO(), "vector_store_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStore.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStore; + import com.openai.models.vectorstores.VectorStoreRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStore vectorStore = client.vectorStores().retrieve("vector_store_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store = openai.vector_stores.retrieve("vector_store_id") + + puts(vector_store) + description: Retrieves a vector store. + post: + operationId: modifyVectorStore + tags: + - Vector stores + summary: Modify vector store + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + description: The ID of the vector store to modify. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateVectorStoreRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreObject' + x-oaiMeta: + name: Modify vector store + group: vector_stores + returns: >- + The modified [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + object. + examples: + response: | + { + "id": "vs_abc123", + "object": "vector_store", + "created_at": 1699061776, + "name": "Support FAQ", + "description": "Contains commonly asked questions and answers, organized by topic.", + "bytes": 139920, + "file_counts": { + "in_progress": 0, + "completed": 3, + "failed": 0, + "cancelled": 0, + "total": 3 + } + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + -d '{ + "name": "Support FAQ" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store = client.vector_stores.update( + vector_store_id="vector_store_id", + ) + print(vector_store.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStore = await client.vectorStores.update('vector_store_id'); + + console.log(vectorStore.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStore, err := client.VectorStores.Update( + context.TODO(), + "vector_store_id", + openai.VectorStoreUpdateParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStore.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStore; + import com.openai.models.vectorstores.VectorStoreUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStore vectorStore = client.vectorStores().update("vector_store_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store = openai.vector_stores.update("vector_store_id") + + puts(vector_store) + description: Modifies a vector store. + delete: + operationId: deleteVectorStore + tags: + - Vector stores + summary: Delete vector store + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + description: The ID of the vector store to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteVectorStoreResponse' + x-oaiMeta: + name: Delete vector store + group: vector_stores + returns: Deletion status + examples: + response: | + { + id: "vs_abc123", + object: "vector_store.deleted", + deleted: true + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -X DELETE + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_deleted = client.vector_stores.delete( + "vector_store_id", + ) + print(vector_store_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreDeleted = await client.vectorStores.delete('vector_store_id'); + + console.log(vectorStoreDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreDeleted, err := client.VectorStores.Delete(context.TODO(), "vector_store_id") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStoreDeleteParams; + import com.openai.models.vectorstores.VectorStoreDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStoreDeleted vectorStoreDeleted = client.vectorStores().delete("vector_store_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store_deleted = openai.vector_stores.delete("vector_store_id") + + puts(vector_store_deleted) + description: Delete a vector store. + /vector_stores/{vector_store_id}/file_batches: + post: + operationId: createVectorStoreFileBatch + tags: + - Vector stores + summary: Create vector store file batch + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: | + The ID of the vector store for which to create a File Batch. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateVectorStoreFileBatchRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + x-oaiMeta: + name: Create vector store file batch + group: vector_stores + returns: >- + A [vector store file + batch](https://platform.openai.com/docs/api-reference/vector-stores-file-batches/batch-object) + object. + examples: + response: | + { + "id": "vsfb_abc123", + "object": "vector_store.file_batch", + "created_at": 1699061776, + "vector_store_id": "vs_abc123", + "status": "in_progress", + "file_counts": { + "in_progress": 1, + "completed": 1, + "failed": 0, + "cancelled": 0, + "total": 0, + } + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/file_batches \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "files": [ + { + "file_id": "file-abc123", + "attributes": {"category": "finance"} + }, + { + "file_id": "file-abc456", + "chunking_strategy": { + "type": "static", + "max_chunk_size_tokens": 1200, + "chunk_overlap_tokens": 200 + } + } + ] + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file_batch = client.vector_stores.file_batches.create( + vector_store_id="vs_abc123", + ) + print(vector_store_file_batch.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFileBatch = await client.vectorStores.fileBatches.create('vs_abc123'); + + console.log(vectorStoreFileBatch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFileBatch, err := client.VectorStores.FileBatches.New( + context.TODO(), + "vs_abc123", + openai.VectorStoreFileBatchNewParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFileBatch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.filebatches.FileBatchCreateParams; + import com.openai.models.vectorstores.filebatches.VectorStoreFileBatch; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStoreFileBatch vectorStoreFileBatch = client.vectorStores().fileBatches().create("vs_abc123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store_file_batch = openai.vector_stores.file_batches.create("vs_abc123") + + puts(vector_store_file_batch) + description: Create a vector store file batch. + /vector_stores/{vector_store_id}/file_batches/{batch_id}: + get: + operationId: getVectorStoreFileBatch + tags: + - Vector stores + summary: Retrieve vector store file batch + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: The ID of the vector store that the file batch belongs to. + - in: path + name: batch_id + required: true + schema: + type: string + example: vsfb_abc123 + description: The ID of the file batch being retrieved. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + x-oaiMeta: + name: Retrieve vector store file batch + group: vector_stores + returns: >- + The [vector store file + batch](https://platform.openai.com/docs/api-reference/vector-stores-file-batches/batch-object) + object. + examples: + response: | + { + "id": "vsfb_abc123", + "object": "vector_store.file_batch", + "created_at": 1699061776, + "vector_store_id": "vs_abc123", + "status": "in_progress", + "file_counts": { + "in_progress": 1, + "completed": 1, + "failed": 0, + "cancelled": 0, + "total": 0, + } + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files_batches/vsfb_abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file_batch = client.vector_stores.file_batches.retrieve( + batch_id="vsfb_abc123", + vector_store_id="vs_abc123", + ) + print(vector_store_file_batch.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFileBatch = await client.vectorStores.fileBatches.retrieve('vsfb_abc123', { + vector_store_id: 'vs_abc123', + }); + + console.log(vectorStoreFileBatch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFileBatch, err := client.VectorStores.FileBatches.Get( + context.TODO(), + "vs_abc123", + "vsfb_abc123", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFileBatch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.filebatches.FileBatchRetrieveParams; + import com.openai.models.vectorstores.filebatches.VectorStoreFileBatch; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileBatchRetrieveParams params = FileBatchRetrieveParams.builder() + .vectorStoreId("vs_abc123") + .batchId("vsfb_abc123") + .build(); + VectorStoreFileBatch vectorStoreFileBatch = client.vectorStores().fileBatches().retrieve(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + vector_store_file_batch = openai.vector_stores.file_batches.retrieve("vsfb_abc123", + vector_store_id: "vs_abc123") + + + puts(vector_store_file_batch) + description: Retrieves a vector store file batch. + /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: + post: + operationId: cancelVectorStoreFileBatch + tags: + - Vector stores + summary: Cancel vector store file batch + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + description: The ID of the vector store that the file batch belongs to. + - in: path + name: batch_id + required: true + schema: + type: string + description: The ID of the file batch to cancel. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileBatchObject' + x-oaiMeta: + name: Cancel vector store file batch + group: vector_stores + returns: The modified vector store file batch object. + examples: + response: | + { + "id": "vsfb_abc123", + "object": "vector_store.file_batch", + "created_at": 1699061776, + "vector_store_id": "vs_abc123", + "status": "in_progress", + "file_counts": { + "in_progress": 12, + "completed": 3, + "failed": 0, + "cancelled": 0, + "total": 15, + } + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files_batches/vsfb_abc123/cancel \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -X POST + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file_batch = client.vector_stores.file_batches.cancel( + batch_id="batch_id", + vector_store_id="vector_store_id", + ) + print(vector_store_file_batch.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFileBatch = await client.vectorStores.fileBatches.cancel('batch_id', { + vector_store_id: 'vector_store_id', + }); + + console.log(vectorStoreFileBatch.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFileBatch, err := client.VectorStores.FileBatches.Cancel( + context.TODO(), + "vector_store_id", + "batch_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFileBatch.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.filebatches.FileBatchCancelParams; + import com.openai.models.vectorstores.filebatches.VectorStoreFileBatch; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileBatchCancelParams params = FileBatchCancelParams.builder() + .vectorStoreId("vector_store_id") + .batchId("batch_id") + .build(); + VectorStoreFileBatch vectorStoreFileBatch = client.vectorStores().fileBatches().cancel(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + vector_store_file_batch = openai.vector_stores.file_batches.cancel("batch_id", vector_store_id: + "vector_store_id") + + + puts(vector_store_file_batch) + description: >- + Cancel a vector store file batch. This attempts to cancel the processing of files in this batch as + soon as possible. + /vector_stores/{vector_store_id}/file_batches/{batch_id}/files: + get: + operationId: listFilesInVectorStoreBatch + tags: + - Vector stores + summary: List vector store files in a batch + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store that the files belong to. + required: true + schema: + type: string + - name: batch_id + in: path + description: The ID of the file batch that the files belong to. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + - name: filter + in: query + description: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + schema: + type: string + enum: + - in_progress + - completed + - failed + - cancelled + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListVectorStoreFilesResponse' + x-oaiMeta: + name: List vector store files in a batch + group: vector_stores + returns: >- + A list of [vector store + file](https://platform.openai.com/docs/api-reference/vector-stores-files/file-object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "file-abc123", + "object": "vector_store.file", + "created_at": 1699061776, + "vector_store_id": "vs_abc123" + }, + { + "id": "file-abc456", + "object": "vector_store.file", + "created_at": 1699061776, + "vector_store_id": "vs_abc123" + } + ], + "first_id": "file-abc123", + "last_id": "file-abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files_batches/vsfb_abc123/files \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.vector_stores.file_batches.list_files( + batch_id="batch_id", + vector_store_id="vector_store_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const vectorStoreFile of client.vectorStores.fileBatches.listFiles('batch_id', { + vector_store_id: 'vector_store_id', + })) { + console.log(vectorStoreFile.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.VectorStores.FileBatches.ListFiles( + context.TODO(), + "vector_store_id", + "batch_id", + openai.VectorStoreFileBatchListFilesParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.filebatches.FileBatchListFilesPage; + import com.openai.models.vectorstores.filebatches.FileBatchListFilesParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileBatchListFilesParams params = FileBatchListFilesParams.builder() + .vectorStoreId("vector_store_id") + .batchId("batch_id") + .build(); + FileBatchListFilesPage page = client.vectorStores().fileBatches().listFiles(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + page = openai.vector_stores.file_batches.list_files("batch_id", vector_store_id: + "vector_store_id") + + + puts(page) + description: Returns a list of vector store files in a batch. + /vector_stores/{vector_store_id}/files: + get: + operationId: listVectorStoreFiles + tags: + - Vector stores + summary: List vector store files + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store that the files belong to. + required: true + schema: + type: string + - name: limit + in: query + description: > + A limit on the number of objects to be returned. Limit can range between 1 and 100, and the + default is 20. + required: false + schema: + type: integer + default: 20 + - name: order + in: query + description: > + Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for + descending order. + schema: + type: string + default: desc + enum: + - asc + - desc + - name: after + in: query + description: > + A cursor for use in pagination. `after` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent + call can include after=obj_foo in order to fetch the next page of the list. + schema: + type: string + - name: before + in: query + description: > + A cursor for use in pagination. `before` is an object ID that defines your place in the list. For + instance, if you make a list request and receive 100 objects, starting with obj_foo, your + subsequent call can include before=obj_foo in order to fetch the previous page of the list. + schema: + type: string + - name: filter + in: query + description: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`. + schema: + type: string + enum: + - in_progress + - completed + - failed + - cancelled + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListVectorStoreFilesResponse' + x-oaiMeta: + name: List vector store files + group: vector_stores + returns: >- + A list of [vector store + file](https://platform.openai.com/docs/api-reference/vector-stores-files/file-object) objects. + examples: + response: | + { + "object": "list", + "data": [ + { + "id": "file-abc123", + "object": "vector_store.file", + "created_at": 1699061776, + "vector_store_id": "vs_abc123" + }, + { + "id": "file-abc456", + "object": "vector_store.file", + "created_at": 1699061776, + "vector_store_id": "vs_abc123" + } + ], + "first_id": "file-abc123", + "last_id": "file-abc456", + "has_more": false + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.vector_stores.files.list( + vector_store_id="vector_store_id", + ) + page = page.data[0] + print(page.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const vectorStoreFile of client.vectorStores.files.list('vector_store_id')) { + console.log(vectorStoreFile.id); + } + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.VectorStores.Files.List( + context.TODO(), + "vector_store_id", + openai.VectorStoreFileListParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.files.FileListPage; + import com.openai.models.vectorstores.files.FileListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileListPage page = client.vectorStores().files().list("vector_store_id"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.vector_stores.files.list("vector_store_id") + + puts(page) + description: Returns a list of vector store files. + post: + operationId: createVectorStoreFile + tags: + - Vector stores + summary: Create vector store file + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: | + The ID of the vector store for which to create a File. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateVectorStoreFileRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + x-oaiMeta: + name: Create vector store file + group: vector_stores + returns: >- + A [vector store + file](https://platform.openai.com/docs/api-reference/vector-stores-files/file-object) object. + examples: + response: | + { + "id": "file-abc123", + "object": "vector_store.file", + "created_at": 1699061776, + "usage_bytes": 1234, + "vector_store_id": "vs_abcd", + "status": "completed", + "last_error": null + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -d '{ + "file_id": "file-abc123" + }' + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file = client.vector_stores.files.create( + vector_store_id="vs_abc123", + file_id="file_id", + ) + print(vector_store_file.id) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const vectorStoreFile = await client.vectorStores.files.create('vs_abc123', { file_id: 'file_id' + }); + + + console.log(vectorStoreFile.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFile, err := client.VectorStores.Files.New( + context.TODO(), + "vs_abc123", + openai.VectorStoreFileNewParams{ + FileID: "file_id", + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFile.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.files.FileCreateParams; + import com.openai.models.vectorstores.files.VectorStoreFile; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileCreateParams params = FileCreateParams.builder() + .vectorStoreId("vs_abc123") + .fileId("file_id") + .build(); + VectorStoreFile vectorStoreFile = client.vectorStores().files().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store_file = openai.vector_stores.files.create("vs_abc123", file_id: "file_id") + + puts(vector_store_file) + description: >- + Create a vector store file by attaching a [File](https://platform.openai.com/docs/api-reference/files) + to a [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object). + /vector_stores/{vector_store_id}/files/{file_id}: + get: + operationId: getVectorStoreFile + tags: + - Vector stores + summary: Retrieve vector store file + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: The ID of the vector store that the file belongs to. + - in: path + name: file_id + required: true + schema: + type: string + example: file-abc123 + description: The ID of the file being retrieved. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + x-oaiMeta: + name: Retrieve vector store file + group: vector_stores + returns: >- + The [vector store + file](https://platform.openai.com/docs/api-reference/vector-stores-files/file-object) object. + examples: + response: | + { + "id": "file-abc123", + "object": "vector_store.file", + "created_at": 1699061776, + "vector_store_id": "vs_abcd", + "status": "completed", + "last_error": null + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files/file-abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file = client.vector_stores.files.retrieve( + file_id="file-abc123", + vector_store_id="vs_abc123", + ) + print(vector_store_file.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFile = await client.vectorStores.files.retrieve('file-abc123', { + vector_store_id: 'vs_abc123', + }); + + console.log(vectorStoreFile.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFile, err := client.VectorStores.Files.Get( + context.TODO(), + "vs_abc123", + "file-abc123", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFile.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.files.FileRetrieveParams; + import com.openai.models.vectorstores.files.VectorStoreFile; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileRetrieveParams params = FileRetrieveParams.builder() + .vectorStoreId("vs_abc123") + .fileId("file-abc123") + .build(); + VectorStoreFile vectorStoreFile = client.vectorStores().files().retrieve(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + vector_store_file = openai.vector_stores.files.retrieve("file-abc123", vector_store_id: + "vs_abc123") + + + puts(vector_store_file) + description: Retrieves a vector store file. + delete: + operationId: deleteVectorStoreFile + tags: + - Vector stores + summary: Delete vector store file + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + description: The ID of the vector store that the file belongs to. + - in: path + name: file_id + required: true + schema: + type: string + description: The ID of the file to delete. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteVectorStoreFileResponse' + x-oaiMeta: + name: Delete vector store file + group: vector_stores + returns: Deletion status + examples: + response: | + { + id: "file-abc123", + object: "vector_store.file.deleted", + deleted: true + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/vs_abc123/files/file-abc123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -H "OpenAI-Beta: assistants=v2" \ + -X DELETE + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file_deleted = client.vector_stores.files.delete( + file_id="file_id", + vector_store_id="vector_store_id", + ) + print(vector_store_file_deleted.id) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFileDeleted = await client.vectorStores.files.delete('file_id', { + vector_store_id: 'vector_store_id', + }); + + console.log(vectorStoreFileDeleted.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFileDeleted, err := client.VectorStores.Files.Delete( + context.TODO(), + "vector_store_id", + "file_id", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFileDeleted.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.files.FileDeleteParams; + import com.openai.models.vectorstores.files.VectorStoreFileDeleted; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileDeleteParams params = FileDeleteParams.builder() + .vectorStoreId("vector_store_id") + .fileId("file_id") + .build(); + VectorStoreFileDeleted vectorStoreFileDeleted = client.vectorStores().files().delete(params); + } + } + ruby: >- + require "openai" + + + openai = OpenAI::Client.new(api_key: "My API Key") + + + vector_store_file_deleted = openai.vector_stores.files.delete("file_id", vector_store_id: + "vector_store_id") + + + puts(vector_store_file_deleted) + description: >- + Delete a vector store file. This will remove the file from the vector store but the file itself will + not be deleted. To delete the file, use the [delete + file](https://platform.openai.com/docs/api-reference/files/delete) endpoint. + post: + operationId: updateVectorStoreFileAttributes + tags: + - Vector stores + summary: Update vector store file attributes + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: The ID of the vector store the file belongs to. + - in: path + name: file_id + required: true + schema: + type: string + example: file-abc123 + description: The ID of the file to update attributes. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateVectorStoreFileAttributesRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileObject' + x-oaiMeta: + name: Update vector store file attributes + group: vector_stores + returns: >- + The updated [vector store + file](https://platform.openai.com/docs/api-reference/vector-stores-files/file-object) object. + examples: + response: | + { + "id": "file-abc123", + "object": "vector_store.file", + "usage_bytes": 1234, + "created_at": 1699061776, + "vector_store_id": "vs_abcd", + "status": "completed", + "last_error": null, + "chunking_strategy": {...}, + "attributes": {"key1": "value1", "key2": 2} + } + request: + curl: | + curl https://api.openai.com/v1/vector_stores/{vector_store_id}/files/{file_id} \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"attributes": {"key1": "value1", "key2": 2}}' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const vectorStoreFile = await client.vectorStores.files.update('file-abc123', { + vector_store_id: 'vs_abc123', + attributes: { foo: 'string' }, + }); + + console.log(vectorStoreFile.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + vector_store_file = client.vector_stores.files.update( + file_id="file-abc123", + vector_store_id="vs_abc123", + attributes={ + "foo": "string" + }, + ) + print(vector_store_file.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + vectorStoreFile, err := client.VectorStores.Files.Update( + context.TODO(), + "vs_abc123", + "file-abc123", + openai.VectorStoreFileUpdateParams{ + Attributes: map[string]openai.VectorStoreFileUpdateParamsAttributeUnion{ + "foo": openai.VectorStoreFileUpdateParamsAttributeUnion{ + OfString: openai.String("string"), + }, + }, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", vectorStoreFile.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.JsonValue; + import com.openai.models.vectorstores.files.FileUpdateParams; + import com.openai.models.vectorstores.files.VectorStoreFile; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileUpdateParams params = FileUpdateParams.builder() + .vectorStoreId("vs_abc123") + .fileId("file-abc123") + .attributes(FileUpdateParams.Attributes.builder() + .putAdditionalProperty("foo", JsonValue.from("string")) + .build()) + .build(); + VectorStoreFile vectorStoreFile = client.vectorStores().files().update(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + vector_store_file = openai.vector_stores.files.update( + "file-abc123", + vector_store_id: "vs_abc123", + attributes: {foo: "string"} + ) + + puts(vector_store_file) + description: Update attributes on a vector store file. + /vector_stores/{vector_store_id}/files/{file_id}/content: + get: + operationId: retrieveVectorStoreFileContent + tags: + - Vector stores + summary: Retrieve vector store file content + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: The ID of the vector store. + - in: path + name: file_id + required: true + schema: + type: string + example: file-abc123 + description: The ID of the file within the vector store. + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreFileContentResponse' + x-oaiMeta: + name: Retrieve vector store file content + group: vector_stores + returns: The parsed contents of the specified vector store file. + examples: + response: | + { + "file_id": "file-abc123", + "filename": "example.txt", + "attributes": {"key": "value"}, + "content": [ + {"type": "text", "text": "..."}, + ... + ] + } + request: + curl: | + curl \ + https://api.openai.com/v1/vector_stores/vs_abc123/files/file-abc123/content \ + -H "Authorization: Bearer $OPENAI_API_KEY" + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const fileContentResponse of client.vectorStores.files.content('file-abc123', { + vector_store_id: 'vs_abc123', + })) { + console.log(fileContentResponse.text); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.vector_stores.files.content( + file_id="file-abc123", + vector_store_id="vs_abc123", + ) + page = page.data[0] + print(page.text) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.VectorStores.Files.Content( + context.TODO(), + "vs_abc123", + "file-abc123", + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.files.FileContentPage; + import com.openai.models.vectorstores.files.FileContentParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + FileContentParams params = FileContentParams.builder() + .vectorStoreId("vs_abc123") + .fileId("file-abc123") + .build(); + FileContentPage page = client.vectorStores().files().content(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.vector_stores.files.content("file-abc123", vector_store_id: "vs_abc123") + + puts(page) + description: Retrieve the parsed contents of a vector store file. + /vector_stores/{vector_store_id}/search: + post: + operationId: searchVectorStore + tags: + - Vector stores + summary: Search vector store + parameters: + - in: path + name: vector_store_id + required: true + schema: + type: string + example: vs_abc123 + description: The ID of the vector store to search. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreSearchRequest' + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreSearchResultsPage' + x-oaiMeta: + name: Search vector store + group: vector_stores + returns: A page of search results from the vector store. + examples: + response: | + { + "object": "vector_store.search_results.page", + "search_query": "What is the return policy?", + "data": [ + { + "file_id": "file_123", + "filename": "document.pdf", + "score": 0.95, + "attributes": { + "author": "John Doe", + "date": "2023-01-01" + }, + "content": [ + { + "type": "text", + "text": "Relevant chunk" + } + ] + }, + { + "file_id": "file_456", + "filename": "notes.txt", + "score": 0.89, + "attributes": { + "author": "Jane Smith", + "date": "2023-01-02" + }, + "content": [ + { + "type": "text", + "text": "Sample text content from the vector store." + } + ] + } + ], + "has_more": false, + "next_page": null + } + request: + curl: | + curl -X POST \ + https://api.openai.com/v1/vector_stores/vs_abc123/search \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"query": "What is the return policy?", "filters": {...}}' + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + // Automatically fetches more pages as needed. + + for await (const vectorStoreSearchResponse of client.vectorStores.search('vs_abc123', { query: + 'string' })) { + console.log(vectorStoreSearchResponse.file_id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.vector_stores.search( + vector_store_id="vs_abc123", + query="string", + ) + page = page.data[0] + print(page.file_id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.VectorStores.Search( + context.TODO(), + "vs_abc123", + openai.VectorStoreSearchParams{ + Query: openai.VectorStoreSearchParamsQueryUnion{ + OfString: openai.String("string"), + }, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.vectorstores.VectorStoreSearchPage; + import com.openai.models.vectorstores.VectorStoreSearchParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VectorStoreSearchParams params = VectorStoreSearchParams.builder() + .vectorStoreId("vs_abc123") + .query("string") + .build(); + VectorStoreSearchPage page = client.vectorStores().search(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.vector_stores.search("vs_abc123", query: "string") + + puts(page) + description: Search a vector store for relevant chunks based on a query and file attributes filter. + /conversations: + post: + tags: + - Conversations + summary: Create a conversation + description: Create a conversation. + operationId: createConversation + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateConversationBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationResource' + x-oaiMeta: + name: Create a conversation + group: conversations + returns: > + Returns a [Conversation](https://platform.openai.com/docs/api-reference/conversations/object) + object. + path: create + examples: + - title: Create a conversation. + request: + curl: | + curl https://api.openai.com/v1/conversations \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "metadata": {"topic": "demo"}, + "items": [ + { + "type": "message", + "role": "user", + "content": "Hello!" + } + ] + }' + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const conversation = await client.conversations.create({ + metadata: { topic: "demo" }, + items: [ + { type: "message", role: "user", content: "Hello!" } + ], + }); + console.log(conversation); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation = client.conversations.create() + print(conversation.id) + csharp: | + using System; + using System.Collections.Generic; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + Conversation conversation = client.CreateConversation( + new CreateConversationOptions + { + Metadata = new Dictionary + { + { "topic", "demo" } + }, + Items = + { + new ConversationMessageInput + { + Role = "user", + Content = "Hello!", + } + } + } + ); + Console.WriteLine(conversation.Id); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const conversation = await client.conversations.create(); + + console.log(conversation.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/conversations" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversation, err := client.Conversations.New(context.TODO(), conversations.ConversationNewParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.Conversation; + import com.openai.models.conversations.ConversationCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Conversation conversation = client.conversations().create(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + conversation = openai.conversations.create + + puts(conversation) + response: | + { + "id": "conv_123", + "object": "conversation", + "created_at": 1741900000, + "metadata": {"topic": "demo"} + } + /conversations/{conversation_id}: + get: + tags: + - Conversations + summary: Retrieve a conversation + description: Get a conversation + operationId: getConversation + parameters: + - name: conversation_id + in: path + description: The ID of the conversation to retrieve. + required: true + schema: + example: conv_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationResource' + x-oaiMeta: + name: Retrieve a conversation + group: conversations + returns: > + Returns a [Conversation](https://platform.openai.com/docs/api-reference/conversations/object) + object. + path: retrieve + examples: + - title: Retrieve a conversation + request: + curl: | + curl https://api.openai.com/v1/conversations/conv_123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const conversation = await client.conversations.retrieve("conv_123"); + console.log(conversation); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation = client.conversations.retrieve( + "conv_123", + ) + print(conversation.id) + csharp: | + using System; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + Conversation conversation = client.GetConversation("conv_123"); + Console.WriteLine(conversation.Id); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const conversation = await client.conversations.retrieve('conv_123'); + + console.log(conversation.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversation, err := client.Conversations.Get(context.TODO(), "conv_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.Conversation; + import com.openai.models.conversations.ConversationRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Conversation conversation = client.conversations().retrieve("conv_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + conversation = openai.conversations.retrieve("conv_123") + + puts(conversation) + response: | + { + "id": "conv_123", + "object": "conversation", + "created_at": 1741900000, + "metadata": {"topic": "demo"} + } + delete: + tags: + - Conversations + summary: Delete a conversation + description: Delete a conversation. Items in the conversation will not be deleted. + operationId: deleteConversation + parameters: + - name: conversation_id + in: path + description: The ID of the conversation to delete. + required: true + schema: + example: conv_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/DeletedConversationResource' + x-oaiMeta: + name: Delete a conversation + group: conversations + returns: | + A success message. + path: delete + examples: + - title: Delete a conversation + request: + curl: | + curl -X DELETE https://api.openai.com/v1/conversations/conv_123 \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const deleted = await client.conversations.delete("conv_123"); + console.log(deleted); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation_deleted_resource = client.conversations.delete( + "conv_123", + ) + print(conversation_deleted_resource.id) + csharp: | + using System; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + DeletedConversation deleted = client.DeleteConversation("conv_123"); + Console.WriteLine(deleted.Id); + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const conversationDeletedResource = await client.conversations.delete('conv_123'); + + console.log(conversationDeletedResource.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversationDeletedResource, err := client.Conversations.Delete(context.TODO(), "conv_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversationDeletedResource.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.conversations.ConversationDeleteParams; + import com.openai.models.conversations.ConversationDeletedResource; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ConversationDeletedResource conversationDeletedResource = client.conversations().delete("conv_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + conversation_deleted_resource = openai.conversations.delete("conv_123") + + puts(conversation_deleted_resource) + response: | + { + "id": "conv_123", + "object": "conversation.deleted", + "deleted": true + } + post: + tags: + - Conversations + summary: Update a conversation + description: Update a conversation + operationId: updateConversation + parameters: + - name: conversation_id + in: path + description: The ID of the conversation to update. + required: true + schema: + example: conv_123 + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateConversationBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ConversationResource' + x-oaiMeta: + name: Update a conversation + group: conversations + returns: > + Returns the updated + [Conversation](https://platform.openai.com/docs/api-reference/conversations/object) object. + path: update + examples: + - title: Update conversation metadata + request: + curl: | + curl https://api.openai.com/v1/conversations/conv_123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "metadata": {"topic": "project-x"} + }' + javascript: | + import OpenAI from "openai"; + const client = new OpenAI(); + + const updated = await client.conversations.update( + "conv_123", + { metadata: { topic: "project-x" } } + ); + console.log(updated); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + conversation = client.conversations.update( + conversation_id="conv_123", + metadata={ + "foo": "string" + }, + ) + print(conversation.id) + csharp: | + using System; + using System.Collections.Generic; + using OpenAI.Conversations; + + OpenAIConversationClient client = new( + apiKey: Environment.GetEnvironmentVariable("OPENAI_API_KEY") + ); + + Conversation updated = client.UpdateConversation( + conversationId: "conv_123", + new UpdateConversationOptions + { + Metadata = new Dictionary + { + { "topic", "project-x" } + } + } + ); + Console.WriteLine(updated.Id); + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const conversation = await client.conversations.update('conv_123', { metadata: { foo: 'string' + } }); + + + console.log(conversation.id); + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/conversations" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + conversation, err := client.Conversations.Update( + context.TODO(), + "conv_123", + conversations.ConversationUpdateParams{ + Metadata: shared.Metadata{ + "foo": "string", + }, + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", conversation.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.JsonValue; + import com.openai.models.conversations.Conversation; + import com.openai.models.conversations.ConversationUpdateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ConversationUpdateParams params = ConversationUpdateParams.builder() + .conversationId("conv_123") + .metadata(ConversationUpdateParams.Metadata.builder() + .putAdditionalProperty("foo", JsonValue.from("string")) + .build()) + .build(); + Conversation conversation = client.conversations().update(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + conversation = openai.conversations.update("conv_123", metadata: {foo: "string"}) + + puts(conversation) + response: | + { + "id": "conv_123", + "object": "conversation", + "created_at": 1741900000, + "metadata": {"topic": "project-x"} + } + /videos: + post: + tags: + - Videos + summary: Create video + description: Create a video + operationId: createVideo + parameters: [] + requestBody: + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateVideoBody' + application/json: + schema: + $ref: '#/components/schemas/CreateVideoBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/VideoResource' + x-oaiMeta: + name: Create video + group: videos + path: create + returns: Returns the newly created [video job](https://platform.openai.com/docs/api-reference/videos/object). + examples: + - title: Create a video render + request: + curl: | + curl https://api.openai.com/v1/videos \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F "model=sora-2" \ + -F "prompt=A calico cat playing a piano on stage" + javascript: | + import OpenAI from 'openai'; + + const openai = new OpenAI(); + + const video = await openai.videos.create({ prompt: 'A calico cat playing a piano on stage' }); + + console.log(video.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + video = client.videos.create( + prompt="x", + ) + print(video.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + video, err := client.Videos.New(context.TODO(), openai.VideoNewParams{ + Prompt: "x", + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", video.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.videos.Video; + import com.openai.models.videos.VideoCreateParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VideoCreateParams params = VideoCreateParams.builder() + .prompt("x") + .build(); + Video video = client.videos().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + video = openai.videos.create(prompt: "x") + + puts(video) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const video = await client.videos.create({ prompt: 'x' }); + + console.log(video.id); + response: | + { + "id": "video_123", + "object": "video", + "model": "sora-2", + "status": "queued", + "progress": 0, + "created_at": 1712697600, + "size": "1024x1808", + "seconds": "8", + "quality": "standard" + } + get: + tags: + - Videos + summary: List videos + description: List videos + operationId: ListVideos + parameters: + - name: limit + in: query + description: Number of items to retrieve + required: false + schema: + type: integer + minimum: 0 + maximum: 100 + - name: order + in: query + description: Sort order of results by timestamp. Use `asc` for ascending order or `desc` for descending order. + required: false + schema: + $ref: '#/components/schemas/OrderEnum' + - name: after + in: query + description: Identifier for the last item from the previous pagination request + required: false + schema: + description: Identifier for the last item from the previous pagination request + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/VideoListResource' + x-oaiMeta: + name: List videos + group: videos + path: list + returns: >- + Returns a paginated list of [video + jobs](https://platform.openai.com/docs/api-reference/videos/object) for the organization. + examples: + - title: List recent videos + request: + curl: | + curl https://api.openai.com/v1/videos \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from 'openai'; + + const openai = new OpenAI(); + + // Automatically fetches more pages as needed. + for await (const video of openai.videos.list()) { + console.log(video.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.videos.list() + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Videos.List(context.TODO(), openai.VideoListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.videos.VideoListPage; + import com.openai.models.videos.VideoListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VideoListPage page = client.videos().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.videos.list + + puts(page) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const video of client.videos.list()) { + console.log(video.id); + } + response: | + { + "data": [ + { + "id": "video_123", + "object": "video", + "model": "sora-2", + "status": "completed" + } + ], + "object": "list" + } + /videos/{video_id}: + get: + tags: + - Videos + summary: Retrieve video + description: Retrieve a video + operationId: GetVideo + parameters: + - name: video_id + in: path + description: The identifier of the video to retrieve. + required: true + schema: + example: video_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/VideoResource' + x-oaiMeta: + name: Retrieve video + group: videos + path: retrieve + returns: >- + Returns the [video job](https://platform.openai.com/docs/api-reference/videos/object) matching the + provided identifier. + examples: + response: '' + request: + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const video = await client.videos.retrieve('video_123'); + + console.log(video.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + video = client.videos.retrieve( + "video_123", + ) + print(video.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + video, err := client.Videos.Get(context.TODO(), "video_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", video.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.videos.Video; + import com.openai.models.videos.VideoRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + Video video = client.videos().retrieve("video_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + video = openai.videos.retrieve("video_123") + + puts(video) + delete: + tags: + - Videos + summary: Delete video + description: Delete a video + operationId: DeleteVideo + parameters: + - name: video_id + in: path + description: The identifier of the video to delete. + required: true + schema: + example: video_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/DeletedVideoResource' + x-oaiMeta: + name: Delete video + group: videos + path: delete + returns: Returns the deleted video job metadata. + examples: + response: '' + request: + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const video = await client.videos.delete('video_123'); + + console.log(video.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + video = client.videos.delete( + "video_123", + ) + print(video.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + video, err := client.Videos.Delete(context.TODO(), "video_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", video.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.videos.VideoDeleteParams; + import com.openai.models.videos.VideoDeleteResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VideoDeleteResponse video = client.videos().delete("video_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + video = openai.videos.delete("video_123") + + puts(video) + /videos/{video_id}/content: + get: + tags: + - Videos + summary: Retrieve video content + description: Download video content + operationId: RetrieveVideoContent + parameters: + - name: video_id + in: path + description: The identifier of the video whose media to download. + required: true + schema: + example: video_123 + type: string + - name: variant + in: query + description: Which downloadable asset to return. Defaults to the MP4 video. + required: false + schema: + $ref: '#/components/schemas/VideoContentVariant' + responses: + '200': + description: The video bytes or preview asset that matches the requested variant. + content: + video/mp4: + schema: + type: string + format: binary + image/webp: + schema: + type: string + format: binary + application/json: + schema: + type: string + x-oaiMeta: + name: Retrieve video content + group: videos + path: content + returns: Streams the rendered video content for the specified video job. + examples: + response: '' + request: + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.videos.downloadContent('video_123'); + + console.log(response); + + const content = await response.blob(); + console.log(content); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.videos.download_content( + video_id="video_123", + ) + print(response) + content = response.read() + print(content) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Videos.DownloadContent( + context.TODO(), + "video_123", + openai.VideoDownloadContentParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.core.http.HttpResponse; + import com.openai.models.videos.VideoDownloadContentParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + HttpResponse response = client.videos().downloadContent("video_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.videos.download_content("video_123") + + puts(response) + /videos/{video_id}/remix: + post: + tags: + - Videos + summary: Remix video + description: Create a video remix + operationId: CreateVideoRemix + parameters: + - name: video_id + in: path + description: The identifier of the completed video to remix. + required: true + schema: + example: video_123 + type: string + requestBody: + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateVideoRemixBody' + application/json: + schema: + $ref: '#/components/schemas/CreateVideoRemixBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/VideoResource' + x-oaiMeta: + name: Remix video + group: videos + path: remix + returns: >- + Creates a remix of the specified [video + job](https://platform.openai.com/docs/api-reference/videos/object) using the provided prompt. + examples: + - title: Remix a generated video + request: + curl: | + curl -X POST https://api.openai.com/v1/videos/video_123/remix \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "Extend the scene with the cat taking a bow to the cheering audience" + }' + javascript: > + import OpenAI from 'openai'; + + + const client = new OpenAI(); + + + const video = await client.videos.remix('video_123', { prompt: 'Extend the scene with the cat + taking a bow to the cheering audience' }); + + + console.log(video.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + video = client.videos.remix( + video_id="video_123", + prompt="x", + ) + print(video.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + video, err := client.Videos.Remix( + context.TODO(), + "video_123", + openai.VideoRemixParams{ + Prompt: "x", + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", video.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.videos.Video; + import com.openai.models.videos.VideoRemixParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + VideoRemixParams params = VideoRemixParams.builder() + .videoId("video_123") + .prompt("x") + .build(); + Video video = client.videos().remix(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + video = openai.videos.remix("video_123", prompt: "x") + + puts(video) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const video = await client.videos.remix('video_123', { prompt: 'x' }); + + console.log(video.id); + response: | + { + "id": "video_456", + "object": "video", + "model": "sora-2", + "status": "queued", + "progress": 0, + "created_at": 1712698600, + "size": "720x1280", + "seconds": "8", + "remixed_from_video_id": "video_123" + } + /responses/input_tokens: + post: + summary: Get input token counts + description: Get input token counts + operationId: Getinputtokencounts + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TokenCountsBody' + application/x-www-form-urlencoded: + schema: + $ref: '#/components/schemas/TokenCountsBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/TokenCountsResource' + x-oaiMeta: + name: Get input token counts + group: responses + returns: | + The input token counts. + ```json + { + object: "response.input_tokens" + input_tokens: 123 + } + ``` + examples: + response: | + { + "object": "response.input_tokens", + "input_tokens": 11 + } + request: + curl: | + curl -X POST https://api.openai.com/v1/responses/input_tokens \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-5", + "input": "Tell me a joke." + }' + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const response = await client.responses.inputTokens.count(); + + console.log(response.input_tokens); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + response = client.responses.input_tokens.count() + print(response.input_tokens) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/responses" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + response, err := client.Responses.InputTokens.Count(context.TODO(), responses.InputTokenCountParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", response.InputTokens) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.responses.inputtokens.InputTokenCountParams; + import com.openai.models.responses.inputtokens.InputTokenCountResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + InputTokenCountResponse response = client.responses().inputTokens().count(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + response = openai.responses.input_tokens.count + + puts(response) + /chatkit/sessions/{session_id}/cancel: + post: + summary: Cancel chat session + description: Cancel a ChatKit session + operationId: CancelChatSessionMethod + parameters: + - name: session_id + in: path + description: Unique identifier for the ChatKit session to cancel. + required: true + schema: + example: cksess_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ChatSessionResource' + x-oaiMeta: + name: Cancel chat session + group: chatkit + beta: true + path: cancel-session + returns: >- + Returns the chat session after it has been cancelled. Cancelling prevents new requests from using + the issued client secret. + examples: + - title: Cancel a ChatKit session by ID + request: + curl: | + curl -X POST \ + https://api.openai.com/v1/chatkit/sessions/cksess_123/cancel \ + -H "OpenAI-Beta: chatkit_beta=v1" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from 'openai'; + + const client = new OpenAI(); + + const chatSession = await client.beta.chatkit.sessions.cancel('cksess_123'); + + console.log(chatSession.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_session = client.beta.chatkit.sessions.cancel( + "cksess_123", + ) + print(chat_session.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatSession, err := client.Beta.ChatKit.Sessions.Cancel(context.TODO(), "cksess_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatSession.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.sessions.SessionCancelParams; + import com.openai.models.beta.chatkit.threads.ChatSession; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatSession chatSession = client.beta().chatkit().sessions().cancel("cksess_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chat_session = openai.beta.chatkit.sessions.cancel("cksess_123") + + puts(chat_session) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatSession = await client.beta.chatkit.sessions.cancel('cksess_123'); + + console.log(chatSession.id); + response: | + { + "id": "cksess_123", + "object": "chatkit.session", + "workflow": { + "id": "workflow_alpha", + "version": "1" + }, + "scope": { + "customer_id": "cust_456" + }, + "max_requests_per_1_minute": 30, + "ttl_seconds": 900, + "status": "cancelled", + "cancelled_at": 1712345678 + } + /chatkit/sessions: + post: + summary: Create ChatKit session + description: Create a ChatKit session + operationId: CreateChatSessionMethod + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateChatSessionBody' + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ChatSessionResource' + x-oaiMeta: + name: Create ChatKit session + group: chatkit + beta: true + path: sessions/create + returns: >- + Returns a [ChatKit session](https://platform.openai.com/docs/api-reference/chatkit/sessions/object) + object. + examples: + - title: Create a scoped session + request: + curl: | + curl https://api.openai.com/v1/chatkit/sessions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "OpenAI-Beta: chatkit_beta=v1" \ + -d '{ + "workflow": { + "id": "workflow_alpha", + "version": "2024-10-01" + }, + "scope": { + "project": "alpha", + "environment": "staging" + }, + "expires_after": 1800, + "max_requests_per_1_minute": 60, + "max_requests_per_session": 500 + }' + javascript: > + import OpenAI from 'openai'; + + + const client = new OpenAI(); + + + const chatSession = await client.beta.chatkit.sessions.create({ user: 'user', workflow: { id: + 'id' } }); + + + console.log(chatSession.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chat_session = client.beta.chatkit.sessions.create( + user="x", + workflow={ + "id": "id" + }, + ) + print(chat_session.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatSession, err := client.Beta.ChatKit.Sessions.New(context.TODO(), openai.BetaChatKitSessionNewParams{ + User: "x", + Workflow: openai.ChatSessionWorkflowParam{ + ID: "id", + }, + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatSession.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.sessions.SessionCreateParams; + import com.openai.models.beta.chatkit.threads.ChatSession; + import com.openai.models.beta.chatkit.threads.ChatSessionWorkflowParam; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + SessionCreateParams params = SessionCreateParams.builder() + .user("x") + .workflow(ChatSessionWorkflowParam.builder() + .id("id") + .build()) + .build(); + ChatSession chatSession = client.beta().chatkit().sessions().create(params); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chat_session = openai.beta.chatkit.sessions.create(user: "x", workflow: {id: "id"}) + + puts(chat_session) + node.js: >- + import OpenAI from 'openai'; + + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + + const chatSession = await client.beta.chatkit.sessions.create({ user: 'x', workflow: { id: + 'id' } }); + + + console.log(chatSession.id); + response: | + { + "client_secret": "chatkit_token_123", + "expires_after": 1800, + "workflow": { + "id": "workflow_alpha", + "version": "2024-10-01" + }, + "scope": { + "project": "alpha", + "environment": "staging" + }, + "max_requests_per_1_minute": 60, + "max_requests_per_session": 500, + "status": "active" + } + /chatkit/threads/{thread_id}/items: + get: + summary: List ChatKit thread items + description: List ChatKit thread items + operationId: ListThreadItemsMethod + parameters: + - name: thread_id + in: path + description: Identifier of the ChatKit thread whose items are requested. + required: true + schema: + example: cthr_123 + type: string + - name: limit + in: query + description: Maximum number of thread items to return. Defaults to 20. + required: false + schema: + type: integer + minimum: 0 + maximum: 100 + - name: order + in: query + description: Sort order for results by creation time. Defaults to `desc`. + required: false + schema: + $ref: '#/components/schemas/OrderEnum' + - name: after + in: query + description: List items created after this thread item ID. Defaults to null for the first page. + required: false + schema: + description: List items created after this thread item ID. Defaults to null for the first page. + type: string + - name: before + in: query + description: List items created before this thread item ID. Defaults to null for the newest results. + required: false + schema: + description: List items created before this thread item ID. Defaults to null for the newest results. + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadItemListResource' + x-oaiMeta: + name: List ChatKit thread items + group: chatkit + beta: true + path: threads/list-items + returns: >- + Returns a [list of thread + items](https://platform.openai.com/docs/api-reference/chatkit/threads/item-list) for the specified + thread. + examples: + - title: Retrieve items for a thread + request: + curl: | + curl "https://api.openai.com/v1/chatkit/threads/cthr_abc123/items?limit=3" \ + -H "OpenAI-Beta: chatkit_beta=v1" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from 'openai'; + + const client = new OpenAI(); + + // Automatically fetches more pages as needed. + for await (const thread of client.beta.chatkit.threads.listItems('cthr_123')) { + console.log(thread); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.chatkit.threads.list_items( + thread_id="cthr_123", + ) + page = page.data[0] + print(page) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.ChatKit.Threads.ListItems( + context.TODO(), + "cthr_123", + openai.BetaChatKitThreadListItemsParams{ + + }, + ) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.threads.ThreadListItemsPage; + import com.openai.models.beta.chatkit.threads.ThreadListItemsParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadListItemsPage page = client.beta().chatkit().threads().listItems("cthr_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.chatkit.threads.list_items("cthr_123") + + puts(page) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const thread of client.beta.chatkit.threads.listItems('cthr_123')) { + console.log(thread); + } + response: | + { + "data": [ + { + "id": "cthi_user_001", + "object": "chatkit.thread_item", + "type": "user_message", + "content": [ + { + "type": "input_text", + "text": "I need help debugging an onboarding issue." + } + ], + "attachments": [] + }, + { + "id": "cthi_assistant_002", + "object": "chatkit.thread_item", + "type": "assistant_message", + "content": [ + { + "type": "output_text", + "text": "Let's start by confirming the workflow version you deployed." + } + ] + } + ], + "has_more": false, + "object": "list" + } + /chatkit/threads/{thread_id}: + get: + summary: Retrieve ChatKit thread + description: Retrieve a ChatKit thread + operationId: GetThreadMethod + parameters: + - name: thread_id + in: path + description: Identifier of the ChatKit thread to retrieve. + required: true + schema: + example: cthr_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadResource' + x-oaiMeta: + name: Retrieve ChatKit thread + group: chatkit + beta: true + path: threads/retrieve + returns: Returns a [Thread](https://platform.openai.com/docs/api-reference/chatkit/threads/object) object. + examples: + - title: Retrieve a thread by ID + request: + curl: | + curl https://api.openai.com/v1/chatkit/threads/cthr_abc123 \ + -H "OpenAI-Beta: chatkit_beta=v1" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from 'openai'; + + const client = new OpenAI(); + + const chatkitThread = await client.beta.chatkit.threads.retrieve('cthr_123'); + + console.log(chatkitThread.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + chatkit_thread = client.beta.chatkit.threads.retrieve( + "cthr_123", + ) + print(chatkit_thread.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + chatkitThread, err := client.Beta.ChatKit.Threads.Get(context.TODO(), "cthr_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", chatkitThread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.threads.ChatKitThread; + import com.openai.models.beta.chatkit.threads.ThreadRetrieveParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ChatKitThread chatkitThread = client.beta().chatkit().threads().retrieve("cthr_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + chatkit_thread = openai.beta.chatkit.threads.retrieve("cthr_123") + + puts(chatkit_thread) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const chatkitThread = await client.beta.chatkit.threads.retrieve('cthr_123'); + + console.log(chatkitThread.id); + response: | + { + "id": "cthr_abc123", + "object": "chatkit.thread", + "title": "Customer escalation", + "items": { + "data": [ + { + "id": "cthi_user_001", + "object": "chatkit.thread_item", + "type": "user_message", + "content": [ + { + "type": "input_text", + "text": "I need help debugging an onboarding issue." + } + ], + "attachments": [] + }, + { + "id": "cthi_assistant_002", + "object": "chatkit.thread_item", + "type": "assistant_message", + "content": [ + { + "type": "output_text", + "text": "Let's start by confirming the workflow version you deployed." + } + ] + } + ], + "has_more": false + } + } + delete: + summary: Delete ChatKit thread + description: Delete a ChatKit thread + operationId: DeleteThreadMethod + parameters: + - name: thread_id + in: path + description: Identifier of the ChatKit thread to delete. + required: true + schema: + example: cthr_123 + type: string + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/DeletedThreadResource' + x-oaiMeta: + beta: true + examples: + response: '' + request: + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + const thread = await client.beta.chatkit.threads.delete('cthr_123'); + + console.log(thread.id); + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + thread = client.beta.chatkit.threads.delete( + "cthr_123", + ) + print(thread.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + thread, err := client.Beta.ChatKit.Threads.Delete(context.TODO(), "cthr_123") + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", thread.ID) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.threads.ThreadDeleteParams; + import com.openai.models.beta.chatkit.threads.ThreadDeleteResponse; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadDeleteResponse thread = client.beta().chatkit().threads().delete("cthr_123"); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + thread = openai.beta.chatkit.threads.delete("cthr_123") + + puts(thread) + name: Delete ChatKit thread + group: chatkit + path: threads/delete + returns: Returns a confirmation object for the deleted thread. + /chatkit/threads: + get: + summary: List ChatKit threads + description: List ChatKit threads + operationId: ListThreadsMethod + parameters: + - name: limit + in: query + description: Maximum number of thread items to return. Defaults to 20. + required: false + schema: + type: integer + minimum: 0 + maximum: 100 + - name: order + in: query + description: Sort order for results by creation time. Defaults to `desc`. + required: false + schema: + $ref: '#/components/schemas/OrderEnum' + - name: after + in: query + description: List items created after this thread item ID. Defaults to null for the first page. + required: false + schema: + description: List items created after this thread item ID. Defaults to null for the first page. + type: string + - name: before + in: query + description: List items created before this thread item ID. Defaults to null for the newest results. + required: false + schema: + description: List items created before this thread item ID. Defaults to null for the newest results. + type: string + - name: user + in: query + description: Filter threads that belong to this user identifier. Defaults to null to return all users. + required: false + schema: + description: Filter threads that belong to this user identifier. Defaults to null to return all users. + type: string + minLength: 1 + maxLength: 512 + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/ThreadListResource' + x-oaiMeta: + name: List ChatKit threads + group: chatkit + beta: true + path: list-threads + returns: Returns a paginated list of ChatKit threads accessible to the request scope. + examples: + - title: List recent threads + request: + curl: | + curl "https://api.openai.com/v1/chatkit/threads?limit=2&order=desc" \ + -H "OpenAI-Beta: chatkit_beta=v1" \ + -H "Authorization: Bearer $OPENAI_API_KEY" + javascript: | + import OpenAI from 'openai'; + + const client = new OpenAI(); + + // Automatically fetches more pages as needed. + for await (const chatkitThread of client.beta.chatkit.threads.list()) { + console.log(chatkitThread.id); + } + python: |- + from openai import OpenAI + + client = OpenAI( + api_key="My API Key", + ) + page = client.beta.chatkit.threads.list() + page = page.data[0] + print(page.id) + go: | + package main + + import ( + "context" + "fmt" + + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + ) + + func main() { + client := openai.NewClient( + option.WithAPIKey("My API Key"), + ) + page, err := client.Beta.ChatKit.Threads.List(context.TODO(), openai.BetaChatKitThreadListParams{ + + }) + if err != nil { + panic(err.Error()) + } + fmt.Printf("%+v\n", page) + } + java: |- + package com.openai.example; + + import com.openai.client.OpenAIClient; + import com.openai.client.okhttp.OpenAIOkHttpClient; + import com.openai.models.beta.chatkit.threads.ThreadListPage; + import com.openai.models.beta.chatkit.threads.ThreadListParams; + + public final class Main { + private Main() {} + + public static void main(String[] args) { + OpenAIClient client = OpenAIOkHttpClient.fromEnv(); + + ThreadListPage page = client.beta().chatkit().threads().list(); + } + } + ruby: |- + require "openai" + + openai = OpenAI::Client.new(api_key: "My API Key") + + page = openai.beta.chatkit.threads.list + + puts(page) + node.js: |- + import OpenAI from 'openai'; + + const client = new OpenAI({ + apiKey: 'My API Key', + }); + + // Automatically fetches more pages as needed. + for await (const chatkitThread of client.beta.chatkit.threads.list()) { + console.log(chatkitThread.id); + } + response: | + { + "data": [ + { + "id": "cthr_abc123", + "object": "chatkit.thread", + "title": "Customer escalation" + }, + { + "id": "cthr_def456", + "object": "chatkit.thread", + "title": "Demo feedback" + } + ], + "has_more": false, + "object": "list" + } +webhooks: + batch_cancelled: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookBatchCancelled' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + batch_completed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookBatchCompleted' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + batch_expired: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookBatchExpired' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + batch_failed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookBatchFailed' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + eval_run_canceled: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookEvalRunCanceled' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + eval_run_failed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookEvalRunFailed' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + eval_run_succeeded: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookEvalRunSucceeded' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + fine_tuning_job_cancelled: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookFineTuningJobCancelled' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + fine_tuning_job_failed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookFineTuningJobFailed' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + fine_tuning_job_succeeded: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookFineTuningJobSucceeded' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + realtime_call_incoming: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookRealtimeCallIncoming' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + response_cancelled: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookResponseCancelled' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + response_completed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookResponseCompleted' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + response_failed: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookResponseFailed' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. + response_incomplete: + post: + requestBody: + description: The event payload sent by the API. + content: + application/json: + schema: + $ref: '#/components/schemas/WebhookResponseIncomplete' + responses: + '200': + description: | + Return a 200 status code to acknowledge receipt of the event. Non-200 + status codes will be retried. +components: + schemas: + AddUploadPartRequest: + type: object + additionalProperties: false + properties: + data: + description: | + The chunk of bytes for this Part. + type: string + format: binary + required: + - data + AdminApiKey: + type: object + description: Represents an individual Admin API key in an org. + properties: + object: + type: string + example: organization.admin_api_key + description: The object type, which is always `organization.admin_api_key` + x-stainless-const: true + id: + type: string + example: key_abc + description: The identifier, which can be referenced in API endpoints + name: + type: string + example: Administration Key + description: The name of the API key + redacted_value: + type: string + example: sk-admin...def + description: The redacted value of the API key + value: + type: string + example: sk-admin-1234abcd + description: The value of the API key. Only shown on create. + created_at: + type: integer + format: int64 + example: 1711471533 + description: The Unix timestamp (in seconds) of when the API key was created + last_used_at: + anyOf: + - type: integer + format: int64 + example: 1711471534 + description: The Unix timestamp (in seconds) of when the API key was last used + - type: 'null' + owner: + type: object + properties: + type: + type: string + example: user + description: Always `user` + object: + type: string + example: organization.user + description: The object type, which is always organization.user + id: + type: string + example: sa_456 + description: The identifier, which can be referenced in API endpoints + name: + type: string + example: My Service Account + description: The name of the user + created_at: + type: integer + format: int64 + example: 1711471533 + description: The Unix timestamp (in seconds) of when the user was created + role: + type: string + example: owner + description: Always `owner` + required: + - object + - redacted_value + - name + - created_at + - last_used_at + - id + - owner + x-oaiMeta: + name: The admin API key object + example: | + { + "object": "organization.admin_api_key", + "id": "key_abc", + "name": "Main Admin Key", + "redacted_value": "sk-admin...xyz", + "created_at": 1711471533, + "last_used_at": 1711471534, + "owner": { + "type": "user", + "object": "organization.user", + "id": "user_123", + "name": "John Doe", + "created_at": 1711471533, + "role": "owner" + } + } + ApiKeyList: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/AdminApiKey' + has_more: + type: boolean + example: false + first_id: + type: string + example: key_abc + last_id: + type: string + example: key_xyz + AssistantObject: + type: object + title: Assistant + description: Represents an `assistant` that can call the model and use tools. + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `assistant`. + type: string + enum: + - assistant + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the assistant was created. + type: integer + name: + anyOf: + - description: | + The name of the assistant. The maximum length is 256 characters. + type: string + maxLength: 256 + - type: 'null' + description: + anyOf: + - description: | + The description of the assistant. The maximum length is 512 characters. + type: string + maxLength: 512 + - type: 'null' + model: + description: > + ID of the model to use. You can use the [List + models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your + available models, or see our [Model overview](https://platform.openai.com/docs/models) for + descriptions of them. + type: string + instructions: + anyOf: + - description: | + The system instructions that the assistant uses. The maximum length is 256,000 characters. + type: string + maxLength: 256000 + - type: 'null' + tools: + description: > + A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools + can be of types `code_interpreter`, `file_search`, or `function`. + default: [] + type: array + maxItems: 128 + items: + $ref: '#/components/schemas/AssistantTool' + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are used by the assistant's tools. The resources are specific to the + type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the + `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter`` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The ID of the [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this assistant. There can be a maximum of 1 vector store attached to the assistant. + maxItems: 1 + items: + type: string + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + anyOf: + - description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + - type: 'null' + top_p: + anyOf: + - type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + - type: 'null' + response_format: + anyOf: + - $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + - type: 'null' + required: + - id + - object + - created_at + - name + - description + - model + - instructions + - tools + - metadata + x-oaiMeta: + name: The assistant object + beta: true + example: | + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1698984975, + "name": "Math Tutor", + "description": null, + "model": "gpt-4o", + "instructions": "You are a personal math tutor. When asked a question, write and run Python code to answer the question.", + "tools": [ + { + "type": "code_interpreter" + } + ], + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + AssistantStreamEvent: + description: > + Represents an event emitted when streaming a Run. + + + Each event in a server-sent events stream has an `event` and `data` property: + + + ``` + + event: thread.created + + data: {"id": "thread_123", "object": "thread", ...} + + ``` + + + We emit events whenever a new object is created, transitions to a new state, or is being + + streamed in parts (deltas). For example, we emit `thread.run.created` when a new run + + is created, `thread.run.completed` when a run completes, and so on. When an Assistant chooses + + to create a message during a run, we emit a `thread.message.created event`, a + + `thread.message.in_progress` event, many `thread.message.delta` events, and finally a + + `thread.message.completed` event. + + + We may add additional events over time, so we recommend handling unknown events gracefully + + in your code. See the [Assistants API + quickstart](https://platform.openai.com/docs/assistants/overview) to learn how to + + integrate the Assistants API with streaming. + x-oaiMeta: + name: Assistant stream events + beta: true + anyOf: + - $ref: '#/components/schemas/ThreadStreamEvent' + - $ref: '#/components/schemas/RunStreamEvent' + - $ref: '#/components/schemas/RunStepStreamEvent' + - $ref: '#/components/schemas/MessageStreamEvent' + - $ref: '#/components/schemas/ErrorEvent' + x-stainless-variantName: error_event + discriminator: + propertyName: event + AssistantSupportedModels: + type: string + enum: + - gpt-5 + - gpt-5-mini + - gpt-5-nano + - gpt-5-2025-08-07 + - gpt-5-mini-2025-08-07 + - gpt-5-nano-2025-08-07 + - gpt-4.1 + - gpt-4.1-mini + - gpt-4.1-nano + - gpt-4.1-2025-04-14 + - gpt-4.1-mini-2025-04-14 + - gpt-4.1-nano-2025-04-14 + - o3-mini + - o3-mini-2025-01-31 + - o1 + - o1-2024-12-17 + - gpt-4o + - gpt-4o-2024-11-20 + - gpt-4o-2024-08-06 + - gpt-4o-2024-05-13 + - gpt-4o-mini + - gpt-4o-mini-2024-07-18 + - gpt-4.5-preview + - gpt-4.5-preview-2025-02-27 + - gpt-4-turbo + - gpt-4-turbo-2024-04-09 + - gpt-4-0125-preview + - gpt-4-turbo-preview + - gpt-4-1106-preview + - gpt-4-vision-preview + - gpt-4 + - gpt-4-0314 + - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0314 + - gpt-4-32k-0613 + - gpt-3.5-turbo + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-1106 + - gpt-3.5-turbo-0125 + - gpt-3.5-turbo-16k-0613 + AssistantToolsCode: + type: object + title: Code interpreter tool + properties: + type: + type: string + description: 'The type of tool being defined: `code_interpreter`' + enum: + - code_interpreter + x-stainless-const: true + required: + - type + AssistantToolsFileSearch: + type: object + title: FileSearch tool + properties: + type: + type: string + description: 'The type of tool being defined: `file_search`' + enum: + - file_search + x-stainless-const: true + file_search: + type: object + description: Overrides for the file search tool. + properties: + max_num_results: + type: integer + minimum: 1 + maximum: 50 + description: > + The maximum number of results the file search tool should output. The default is 20 for + `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number should be between 1 and 50 inclusive. + + + Note that the file search tool may output fewer than `max_num_results` results. See the [file + search tool + documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + ranking_options: + $ref: '#/components/schemas/FileSearchRankingOptions' + required: + - type + AssistantToolsFileSearchTypeOnly: + type: object + title: AssistantToolsFileSearchTypeOnly + properties: + type: + type: string + description: 'The type of tool being defined: `file_search`' + enum: + - file_search + x-stainless-const: true + required: + - type + AssistantToolsFunction: + type: object + title: Function tool + properties: + type: + type: string + description: 'The type of tool being defined: `function`' + enum: + - function + x-stainless-const: true + function: + $ref: '#/components/schemas/FunctionObject' + required: + - type + - function + AssistantsApiResponseFormatOption: + description: > + Specifies the format that the model must output. Compatible with + [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), [GPT-4 + Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4), and all GPT-3.5 Turbo models + since `gpt-3.5-turbo-1106`. + + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures + the model will match your supplied JSON schema. Learn more in the [Structured Outputs + guide](https://platform.openai.com/docs/guides/structured-outputs). + + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model + generates is valid JSON. + + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via + a system or user message. Without this, the model may generate an unending stream of whitespace until + the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. + Also note that the message content may be partially cut off if `finish_reason="length"`, which + indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length. + anyOf: + - type: string + description: | + `auto` is the default value + enum: + - auto + x-stainless-const: true + - $ref: '#/components/schemas/ResponseFormatText' + - $ref: '#/components/schemas/ResponseFormatJsonObject' + - $ref: '#/components/schemas/ResponseFormatJsonSchema' + AssistantsApiToolChoiceOption: + description: > + Controls which (if any) tool is called by the model. + + `none` means the model will not call any tools and instead generates a message. + + `auto` is the default value and means the model can pick between generating a message or calling one + or more tools. + + `required` means the model must call one or more tools before responding to the user. + + Specifying a particular tool like `{"type": "file_search"}` or `{"type": "function", "function": + {"name": "my_function"}}` forces the model to call that tool. + anyOf: + - type: string + description: > + `none` means the model will not call any tools and instead generates a message. `auto` means the + model can pick between generating a message or calling one or more tools. `required` means the + model must call one or more tools before responding to the user. + enum: + - none + - auto + - required + title: Auto + - $ref: '#/components/schemas/AssistantsNamedToolChoice' + AssistantsNamedToolChoice: + type: object + description: Specifies a tool the model should use. Use to force the model to call a specific tool. + properties: + type: + type: string + enum: + - function + - code_interpreter + - file_search + description: The type of the tool. If type is `function`, the function name must be set + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + required: + - name + required: + - type + AudioResponseFormat: + description: > + The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, `vtt`, or + `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format is + `json`. For `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and + `diarized_json`, with `diarized_json` required to receive speaker annotations. + type: string + enum: + - json + - text + - srt + - verbose_json + - vtt + - diarized_json + default: json + AudioTranscription: + type: object + properties: + model: + type: string + description: > + The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, + `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you + need diarization with speaker labels. + enum: + - whisper-1 + - gpt-4o-mini-transcribe + - gpt-4o-transcribe + - gpt-4o-transcribe-diarize + language: + type: string + description: | + The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format + will improve accuracy and latency. + prompt: + type: string + description: > + An optional text to guide the model's style or continue a previous audio + + segment. + + For `whisper-1`, the [prompt is a list of + keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). + + For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text + string, for example "expect words related to technology". + AuditLog: + type: object + description: A log of a user action or configuration change within this organization. + properties: + id: + type: string + description: The ID of this log. + type: + $ref: '#/components/schemas/AuditLogEventType' + effective_at: + type: integer + description: The Unix timestamp (in seconds) of the event. + project: + type: object + description: >- + The project that the action was scoped to. Absent for actions not scoped to projects. Note that + any admin actions taken via Admin API keys are associated with the default project. + properties: + id: + type: string + description: The project ID. + name: + type: string + description: The project title. + actor: + $ref: '#/components/schemas/AuditLogActor' + api_key.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The tracking ID of the API key. + data: + type: object + description: The payload used to create the API key. + properties: + scopes: + type: array + items: + type: string + description: A list of scopes allowed for the API key, e.g. `["api.model.request"]` + api_key.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The tracking ID of the API key. + changes_requested: + type: object + description: The payload used to update the API key. + properties: + scopes: + type: array + items: + type: string + description: A list of scopes allowed for the API key, e.g. `["api.model.request"]` + api_key.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The tracking ID of the API key. + checkpoint.permission.created: + type: object + description: The project and fine-tuned model checkpoint that the checkpoint permission was created for. + properties: + id: + type: string + description: The ID of the checkpoint permission. + data: + type: object + description: The payload used to create the checkpoint permission. + properties: + project_id: + type: string + description: The ID of the project that the checkpoint permission was created for. + fine_tuned_model_checkpoint: + type: string + description: The ID of the fine-tuned model checkpoint. + checkpoint.permission.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the checkpoint permission. + external_key.registered: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the external key configuration. + data: + type: object + description: The configuration for the external key. + external_key.removed: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the external key configuration. + group.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the group. + data: + type: object + description: Information about the created group. + properties: + group_name: + type: string + description: The group name. + group.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the group. + changes_requested: + type: object + description: The payload used to update the group. + properties: + group_name: + type: string + description: The updated group name. + group.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the group. + scim.enabled: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the SCIM was enabled for. + scim.disabled: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the SCIM was disabled for. + invite.sent: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the invite. + data: + type: object + description: The payload used to create the invite. + properties: + email: + type: string + description: The email invited to the organization. + role: + type: string + description: The role the email was invited to be. Is either `owner` or `member`. + invite.accepted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the invite. + invite.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the invite. + ip_allowlist.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the IP allowlist configuration. + name: + type: string + description: The name of the IP allowlist configuration. + allowed_ips: + type: array + description: The IP addresses or CIDR ranges included in the configuration. + items: + type: string + ip_allowlist.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the IP allowlist configuration. + allowed_ips: + type: array + description: The updated set of IP addresses or CIDR ranges in the configuration. + items: + type: string + ip_allowlist.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The ID of the IP allowlist configuration. + name: + type: string + description: The name of the IP allowlist configuration. + allowed_ips: + type: array + description: The IP addresses or CIDR ranges that were in the configuration. + items: + type: string + ip_allowlist.config.activated: + type: object + description: The details for events with this `type`. + properties: + configs: + type: array + description: The configurations that were activated. + items: + type: object + properties: + id: + type: string + description: The ID of the IP allowlist configuration. + name: + type: string + description: The name of the IP allowlist configuration. + ip_allowlist.config.deactivated: + type: object + description: The details for events with this `type`. + properties: + configs: + type: array + description: The configurations that were deactivated. + items: + type: object + properties: + id: + type: string + description: The ID of the IP allowlist configuration. + name: + type: string + description: The name of the IP allowlist configuration. + login.succeeded: + type: object + description: This event has no additional fields beyond the standard audit log attributes. + login.failed: + type: object + description: The details for events with this `type`. + properties: + error_code: + type: string + description: The error code of the failure. + error_message: + type: string + description: The error message of the failure. + logout.succeeded: + type: object + description: This event has no additional fields beyond the standard audit log attributes. + logout.failed: + type: object + description: The details for events with this `type`. + properties: + error_code: + type: string + description: The error code of the failure. + error_message: + type: string + description: The error message of the failure. + organization.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The organization ID. + changes_requested: + type: object + description: The payload used to update the organization settings. + properties: + title: + type: string + description: The organization title. + description: + type: string + description: The organization description. + name: + type: string + description: The organization name. + threads_ui_visibility: + type: string + description: >- + Visibility of the threads page which shows messages created with the Assistants API and + Playground. One of `ANY_ROLE`, `OWNERS`, or `NONE`. + usage_dashboard_visibility: + type: string + description: >- + Visibility of the usage dashboard which shows activity and costs for your organization. + One of `ANY_ROLE` or `OWNERS`. + api_call_logging: + type: string + description: >- + How your organization logs data from supported API calls. One of `disabled`, + `enabled_per_call`, `enabled_for_all_projects`, or `enabled_for_selected_projects` + api_call_logging_project_ids: + type: string + description: The list of project ids if api_call_logging is set to `enabled_for_selected_projects` + project.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The project ID. + data: + type: object + description: The payload used to create the project. + properties: + name: + type: string + description: The project name. + title: + type: string + description: The title of the project as seen on the dashboard. + project.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The project ID. + changes_requested: + type: object + description: The payload used to update the project. + properties: + title: + type: string + description: The title of the project as seen on the dashboard. + project.archived: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The project ID. + project.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The project ID. + rate_limit.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The rate limit ID + changes_requested: + type: object + description: The payload used to update the rate limits. + properties: + max_requests_per_1_minute: + type: integer + description: The maximum requests per minute. + max_tokens_per_1_minute: + type: integer + description: The maximum tokens per minute. + max_images_per_1_minute: + type: integer + description: The maximum images per minute. Only relevant for certain models. + max_audio_megabytes_per_1_minute: + type: integer + description: The maximum audio megabytes per minute. Only relevant for certain models. + max_requests_per_1_day: + type: integer + description: The maximum requests per day. Only relevant for certain models. + batch_1_day_max_input_tokens: + type: integer + description: The maximum batch input tokens per day. Only relevant for certain models. + rate_limit.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The rate limit ID + role.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The role ID. + role_name: + type: string + description: The name of the role. + permissions: + type: array + items: + type: string + description: The permissions granted by the role. + resource_type: + type: string + description: The type of resource the role belongs to. + resource_id: + type: string + description: The resource the role is scoped to. + role.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The role ID. + changes_requested: + type: object + description: The payload used to update the role. + properties: + role_name: + type: string + description: The updated role name, when provided. + resource_id: + type: string + description: The resource the role is scoped to. + resource_type: + type: string + description: The type of resource the role belongs to. + permissions_added: + type: array + items: + type: string + description: The permissions added to the role. + permissions_removed: + type: array + items: + type: string + description: The permissions removed from the role. + description: + type: string + description: The updated role description, when provided. + metadata: + type: object + description: Additional metadata stored on the role. + role.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The role ID. + role.assignment.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The identifier of the role assignment. + principal_id: + type: string + description: The principal (user or group) that received the role. + principal_type: + type: string + description: The type of principal (user or group) that received the role. + resource_id: + type: string + description: The resource the role assignment is scoped to. + resource_type: + type: string + description: The type of resource the role assignment is scoped to. + role.assignment.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The identifier of the role assignment. + principal_id: + type: string + description: The principal (user or group) that had the role removed. + principal_type: + type: string + description: The type of principal (user or group) that had the role removed. + resource_id: + type: string + description: The resource the role assignment was scoped to. + resource_type: + type: string + description: The type of resource the role assignment was scoped to. + service_account.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The service account ID. + data: + type: object + description: The payload used to create the service account. + properties: + role: + type: string + description: The role of the service account. Is either `owner` or `member`. + service_account.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The service account ID. + changes_requested: + type: object + description: The payload used to updated the service account. + properties: + role: + type: string + description: The role of the service account. Is either `owner` or `member`. + service_account.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The service account ID. + user.added: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The user ID. + data: + type: object + description: The payload used to add the user to the project. + properties: + role: + type: string + description: The role of the user. Is either `owner` or `member`. + user.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The project ID. + changes_requested: + type: object + description: The payload used to update the user. + properties: + role: + type: string + description: The role of the user. Is either `owner` or `member`. + user.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The user ID. + certificate.created: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The certificate ID. + name: + type: string + description: The name of the certificate. + certificate.updated: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The certificate ID. + name: + type: string + description: The name of the certificate. + certificate.deleted: + type: object + description: The details for events with this `type`. + properties: + id: + type: string + description: The certificate ID. + name: + type: string + description: The name of the certificate. + certificate: + type: string + description: The certificate content in PEM format. + certificates.activated: + type: object + description: The details for events with this `type`. + properties: + certificates: + type: array + items: + type: object + properties: + id: + type: string + description: The certificate ID. + name: + type: string + description: The name of the certificate. + certificates.deactivated: + type: object + description: The details for events with this `type`. + properties: + certificates: + type: array + items: + type: object + properties: + id: + type: string + description: The certificate ID. + name: + type: string + description: The name of the certificate. + required: + - id + - type + - effective_at + - actor + x-oaiMeta: + name: The audit log object + example: | + { + "id": "req_xxx_20240101", + "type": "api_key.created", + "effective_at": 1720804090, + "actor": { + "type": "session", + "session": { + "user": { + "id": "user-xxx", + "email": "user@example.com" + }, + "ip_address": "127.0.0.1", + "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + } + }, + "api_key.created": { + "id": "key_xxxx", + "data": { + "scopes": ["resource.operation"] + } + } + } + AuditLogActor: + type: object + description: The actor who performed the audit logged action. + properties: + type: + type: string + description: The type of actor. Is either `session` or `api_key`. + enum: + - session + - api_key + session: + $ref: '#/components/schemas/AuditLogActorSession' + api_key: + $ref: '#/components/schemas/AuditLogActorApiKey' + AuditLogActorApiKey: + type: object + description: The API Key used to perform the audit logged action. + properties: + id: + type: string + description: The tracking id of the API key. + type: + type: string + description: The type of API key. Can be either `user` or `service_account`. + enum: + - user + - service_account + user: + $ref: '#/components/schemas/AuditLogActorUser' + service_account: + $ref: '#/components/schemas/AuditLogActorServiceAccount' + AuditLogActorServiceAccount: + type: object + description: The service account that performed the audit logged action. + properties: + id: + type: string + description: The service account id. + AuditLogActorSession: + type: object + description: The session in which the audit logged action was performed. + properties: + user: + $ref: '#/components/schemas/AuditLogActorUser' + ip_address: + type: string + description: The IP address from which the action was performed. + AuditLogActorUser: + type: object + description: The user who performed the audit logged action. + properties: + id: + type: string + description: The user id. + email: + type: string + description: The user email. + AuditLogEventType: + type: string + description: The event type. + enum: + - api_key.created + - api_key.updated + - api_key.deleted + - certificate.created + - certificate.updated + - certificate.deleted + - certificates.activated + - certificates.deactivated + - checkpoint.permission.created + - checkpoint.permission.deleted + - external_key.registered + - external_key.removed + - group.created + - group.updated + - group.deleted + - invite.sent + - invite.accepted + - invite.deleted + - ip_allowlist.created + - ip_allowlist.updated + - ip_allowlist.deleted + - ip_allowlist.config.activated + - ip_allowlist.config.deactivated + - login.succeeded + - login.failed + - logout.succeeded + - logout.failed + - organization.updated + - project.created + - project.updated + - project.archived + - project.deleted + - rate_limit.updated + - rate_limit.deleted + - resource.deleted + - role.created + - role.updated + - role.deleted + - role.assignment.created + - role.assignment.deleted + - scim.enabled + - scim.disabled + - service_account.created + - service_account.updated + - service_account.deleted + - user.added + - user.updated + - user.deleted + AutoChunkingStrategyRequestParam: + type: object + title: Auto Chunking Strategy + description: >- + The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and + `chunk_overlap_tokens` of `400`. + additionalProperties: false + properties: + type: + type: string + description: Always `auto`. + enum: + - auto + x-stainless-const: true + required: + - type + Batch: + type: object + properties: + id: + type: string + object: + type: string + enum: + - batch + description: The object type, which is always `batch`. + x-stainless-const: true + endpoint: + type: string + description: The OpenAI API endpoint used by the batch. + model: + type: string + description: | + Model ID used to process the batch, like `gpt-5-2025-08-07`. OpenAI + offers a wide range of models with different capabilities, performance + characteristics, and price points. Refer to the [model + guide](https://platform.openai.com/docs/models) to browse and compare available models. + errors: + type: object + properties: + object: + type: string + description: The object type, which is always `list`. + data: + type: array + items: + $ref: '#/components/schemas/BatchError' + input_file_id: + type: string + description: The ID of the input file for the batch. + completion_window: + type: string + description: The time frame within which the batch should be processed. + status: + type: string + description: The current status of the batch. + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + output_file_id: + type: string + description: The ID of the file containing the outputs of successfully executed requests. + error_file_id: + type: string + description: The ID of the file containing the outputs of requests with errors. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch was created. + in_progress_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch started processing. + expires_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch will expire. + finalizing_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch started finalizing. + completed_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch was completed. + failed_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch failed. + expired_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch expired. + cancelling_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch started cancelling. + cancelled_at: + type: integer + description: The Unix timestamp (in seconds) for when the batch was cancelled. + request_counts: + $ref: '#/components/schemas/BatchRequestCounts' + usage: + type: object + description: | + Represents token usage details including input tokens, output tokens, a + breakdown of output tokens, and the total tokens used. Only populated on + batches created after September 7, 2025. + properties: + input_tokens: + type: integer + description: The number of input tokens. + input_tokens_details: + type: object + description: A detailed breakdown of the input tokens. + properties: + cached_tokens: + type: integer + description: | + The number of tokens that were retrieved from the cache. [More on + prompt caching](https://platform.openai.com/docs/guides/prompt-caching). + required: + - cached_tokens + output_tokens: + type: integer + description: The number of output tokens. + output_tokens_details: + type: object + description: A detailed breakdown of the output tokens. + properties: + reasoning_tokens: + type: integer + description: The number of reasoning tokens. + required: + - reasoning_tokens + total_tokens: + type: integer + description: The total number of tokens used. + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + metadata: + $ref: '#/components/schemas/Metadata' + required: + - id + - object + - endpoint + - input_file_id + - completion_window + - status + - created_at + x-oaiMeta: + name: The batch object + example: | + { + "id": "batch_abc123", + "object": "batch", + "endpoint": "/v1/completions", + "model": "gpt-5-2025-08-07", + "errors": null, + "input_file_id": "file-abc123", + "completion_window": "24h", + "status": "completed", + "output_file_id": "file-cvaTdG", + "error_file_id": "file-HOWS94", + "created_at": 1711471533, + "in_progress_at": 1711471538, + "expires_at": 1711557933, + "finalizing_at": 1711493133, + "completed_at": 1711493163, + "failed_at": null, + "expired_at": null, + "cancelling_at": null, + "cancelled_at": null, + "request_counts": { + "total": 100, + "completed": 95, + "failed": 5 + }, + "usage": { + "input_tokens": 1500, + "input_tokens_details": { + "cached_tokens": 1024 + }, + "output_tokens": 500, + "output_tokens_details": { + "reasoning_tokens": 300 + }, + "total_tokens": 2000 + }, + "metadata": { + "customer_id": "user_123456789", + "batch_description": "Nightly eval job", + } + } + BatchFileExpirationAfter: + type: object + title: File expiration policy + description: The expiration policy for the output and/or error file that are generated for a batch. + properties: + anchor: + description: >- + Anchor timestamp after which the expiration policy applies. Supported anchors: `created_at`. Note + that the anchor is the file creation time, not the time the batch is created. + type: string + enum: + - created_at + x-stainless-const: true + seconds: + description: >- + The number of seconds after the anchor time that the file will expire. Must be between 3600 (1 + hour) and 2592000 (30 days). + type: integer + minimum: 3600 + maximum: 2592000 + required: + - anchor + - seconds + BatchRequestInput: + type: object + description: The per-line object of the batch input file + properties: + custom_id: + type: string + description: >- + A developer-provided per-request id that will be used to match outputs to inputs. Must be unique + for each request in a batch. + method: + type: string + enum: + - POST + description: The HTTP method to be used for the request. Currently only `POST` is supported. + x-stainless-const: true + url: + type: string + description: >- + The OpenAI API relative URL to be used for the request. Currently `/v1/responses`, + `/v1/chat/completions`, `/v1/embeddings`, `/v1/completions`, and `/v1/moderations` are supported. + x-oaiMeta: + name: The request input object + example: > + {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": + "gpt-4o-mini", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": + "user", "content": "What is 2+2?"}]}} + BatchRequestOutput: + type: object + description: The per-line object of the batch output and error files + properties: + id: + type: string + custom_id: + type: string + description: A developer-provided per-request id that will be used to match outputs to inputs. + response: + anyOf: + - type: object + properties: + status_code: + type: integer + description: The HTTP status code of the response + request_id: + type: string + description: >- + An unique identifier for the OpenAI API request. Please include this request ID when + contacting support. + body: + type: object + x-oaiTypeLabel: map + description: The JSON body of the response + - type: 'null' + error: + anyOf: + - type: object + description: >- + For requests that failed with a non-HTTP error, this will contain more information on the + cause of the failure. + properties: + code: + type: string + description: | + A machine-readable error code. + + Possible values: + - `batch_expired`: The request could not be executed before the + completion window ended. + - `batch_cancelled`: The batch was cancelled before this request + executed. + - `request_timeout`: The underlying call to the model timed out. + message: + type: string + description: A human-readable error message. + - type: 'null' + x-oaiMeta: + name: The request output object + example: > + {"id": "batch_req_wnaDys", "custom_id": "request-2", "response": {"status_code": 200, "request_id": + "req_c187b3", "body": {"id": "chatcmpl-9758Iw", "object": "chat.completion", "created": 1711475054, + "model": "gpt-4o-mini", "choices": [{"index": 0, "message": {"role": "assistant", "content": "2 + 2 + equals 4."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 24, "completion_tokens": 15, + "total_tokens": 39}, "system_fingerprint": null}}, "error": null} + Certificate: + type: object + description: Represents an individual `certificate` uploaded to the organization. + properties: + object: + type: string + enum: + - certificate + - organization.certificate + - organization.project.certificate + description: > + The object type. + + + - If creating, updating, or getting a specific certificate, the object type is `certificate`. + + - If listing, activating, or deactivating certificates for the organization, the object type is + `organization.certificate`. + + - If listing, activating, or deactivating certificates for a project, the object type is + `organization.project.certificate`. + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints + name: + type: string + description: The name of the certificate. + created_at: + type: integer + description: The Unix timestamp (in seconds) of when the certificate was uploaded. + certificate_details: + type: object + properties: + valid_at: + type: integer + description: The Unix timestamp (in seconds) of when the certificate becomes valid. + expires_at: + type: integer + description: The Unix timestamp (in seconds) of when the certificate expires. + content: + type: string + description: The content of the certificate in PEM format. + active: + type: boolean + description: >- + Whether the certificate is currently active at the specified scope. Not returned when getting + details for a specific certificate. + required: + - object + - id + - name + - created_at + - certificate_details + x-oaiMeta: + name: The certificate object + example: | + { + "object": "certificate", + "id": "cert_abc", + "name": "My Certificate", + "created_at": 1234567, + "certificate_details": { + "valid_at": 1234567, + "expires_at": 12345678, + "content": "-----BEGIN CERTIFICATE----- MIIGAjCCA...6znFlOW+ -----END CERTIFICATE-----" + } + } + ChatCompletionAllowedTools: + type: object + title: Allowed tools + description: | + Constrains the tools available to the model to a pre-defined set. + properties: + mode: + type: string + enum: + - auto + - required + description: | + Constrains the tools available to the model to a pre-defined set. + + `auto` allows the model to pick from among the allowed tools and generate a + message. + + `required` requires the model to call one or more of the allowed tools. + tools: + type: array + description: | + A list of tool definitions that the model should be allowed to call. + + For the Chat Completions API, the list of tool definitions might look like: + ```json + [ + { "type": "function", "function": { "name": "get_weather" } }, + { "type": "function", "function": { "name": "get_time" } } + ] + ``` + items: + type: object + x-oaiExpandable: false + description: | + A tool definition that the model should be allowed to call. + additionalProperties: true + required: + - mode + - tools + ChatCompletionAllowedToolsChoice: + type: object + title: Allowed tools + description: | + Constrains the tools available to the model to a pre-defined set. + properties: + type: + type: string + enum: + - allowed_tools + description: Allowed tool configuration type. Always `allowed_tools`. + x-stainless-const: true + allowed_tools: + $ref: '#/components/schemas/ChatCompletionAllowedTools' + required: + - type + - allowed_tools + ChatCompletionDeleted: + type: object + properties: + object: + type: string + description: The type of object being deleted. + enum: + - chat.completion.deleted + x-stainless-const: true + id: + type: string + description: The ID of the chat completion that was deleted. + deleted: + type: boolean + description: Whether the chat completion was deleted. + required: + - object + - id + - deleted + ChatCompletionFunctionCallOption: + type: object + description: | + Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. + properties: + name: + type: string + description: The name of the function to call. + required: + - name + x-stainless-variantName: function_call_option + ChatCompletionFunctions: + type: object + deprecated: true + properties: + description: + type: string + description: >- + A description of what the function does, used by the model to choose when and how to call the + function. + name: + type: string + description: >- + The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. + parameters: + $ref: '#/components/schemas/FunctionParameters' + required: + - name + ChatCompletionList: + type: object + title: ChatCompletionList + description: | + An object representing a list of Chat Completions. + properties: + object: + type: string + enum: + - list + default: list + description: | + The type of this object. It is always set to "list". + x-stainless-const: true + data: + type: array + description: | + An array of chat completion objects. + items: + $ref: '#/components/schemas/CreateChatCompletionResponse' + first_id: + type: string + description: The identifier of the first chat completion in the data array. + last_id: + type: string + description: The identifier of the last chat completion in the data array. + has_more: + type: boolean + description: Indicates whether there are more Chat Completions available. + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: The chat completion list object + group: chat + example: | + { + "object": "list", + "data": [ + { + "object": "chat.completion", + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "model": "gpt-4o-2024-08-06", + "created": 1738960610, + "request_id": "req_ded8ab984ec4bf840f37566c1011c417", + "tool_choice": null, + "usage": { + "total_tokens": 31, + "completion_tokens": 18, + "prompt_tokens": 13 + }, + "seed": 4944116822809979520, + "top_p": 1.0, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "system_fingerprint": "fp_50cad350e4", + "input_user": null, + "service_tier": "default", + "tools": null, + "metadata": {}, + "choices": [ + { + "index": 0, + "message": { + "content": "Mind of circuits hum, \nLearning patterns in silence— \nFuture's quiet spark.", + "role": "assistant", + "tool_calls": null, + "function_call": null + }, + "finish_reason": "stop", + "logprobs": null + } + ], + "response_format": null + } + ], + "first_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "last_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "has_more": false + } + ChatCompletionMessageCustomToolCall: + type: object + title: Custom tool call + description: | + A call to a custom tool created by the model. + properties: + id: + type: string + description: The ID of the tool call. + type: + type: string + enum: + - custom + description: The type of the tool. Always `custom`. + x-stainless-const: true + custom: + type: object + description: The custom tool that the model called. + properties: + name: + type: string + description: The name of the custom tool to call. + input: + type: string + description: The input for the custom tool call generated by the model. + required: + - name + - input + required: + - id + - type + - custom + ChatCompletionMessageList: + type: object + title: ChatCompletionMessageList + description: | + An object representing a list of chat completion messages. + properties: + object: + type: string + enum: + - list + default: list + description: | + The type of this object. It is always set to "list". + x-stainless-const: true + data: + type: array + description: | + An array of chat completion message objects. + items: + allOf: + - $ref: '#/components/schemas/ChatCompletionResponseMessage' + - type: object + required: + - id + properties: + id: + type: string + description: The identifier of the chat message. + content_parts: + anyOf: + - type: array + description: > + If a content parts array was provided, this is an array of `text` and `image_url` + parts. + + Otherwise, null. + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartImage' + - type: 'null' + first_id: + type: string + description: The identifier of the first chat message in the data array. + last_id: + type: string + description: The identifier of the last chat message in the data array. + has_more: + type: boolean + description: Indicates whether there are more chat messages available. + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: The chat completion message list object + group: chat + example: | + { + "object": "list", + "data": [ + { + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "role": "user", + "content": "write a haiku about ai", + "name": null, + "content_parts": null + } + ], + "first_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "last_id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2-0", + "has_more": false + } + ChatCompletionMessageToolCall: + type: object + title: Function tool call + description: | + A call to a function tool created by the model. + properties: + id: + type: string + description: The ID of the tool call. + type: + type: string + enum: + - function + description: The type of the tool. Currently, only `function` is supported. + x-stainless-const: true + function: + type: object + description: The function that the model called. + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: >- + The arguments to call the function with, as generated by the model in JSON format. Note that + the model does not always generate valid JSON, and may hallucinate parameters not defined by + your function schema. Validate the arguments in your code before calling your function. + required: + - name + - arguments + required: + - id + - type + - function + ChatCompletionMessageToolCallChunk: + type: object + properties: + index: + type: integer + id: + type: string + description: The ID of the tool call. + type: + type: string + enum: + - function + description: The type of the tool. Currently, only `function` is supported. + x-stainless-const: true + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: >- + The arguments to call the function with, as generated by the model in JSON format. Note that + the model does not always generate valid JSON, and may hallucinate parameters not defined by + your function schema. Validate the arguments in your code before calling your function. + required: + - index + ChatCompletionMessageToolCalls: + type: array + description: The tool calls generated by the model, such as function calls. + items: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ChatCompletionMessageToolCall' + - $ref: '#/components/schemas/ChatCompletionMessageCustomToolCall' + x-stainless-naming: + python: + model_name: chat_completion_message_tool_call_union + param_model_name: chat_completion_message_tool_call_union_param + x-stainless-go-variant-constructor: skip + ChatCompletionModalities: + anyOf: + - type: array + description: > + Output types that you would like the model to generate for this request. + + Most models are capable of generating text, which is the default: + + + `["text"]` + + + The `gpt-4o-audio-preview` model can also be used to [generate + audio](https://platform.openai.com/docs/guides/audio). To + + request that this model generate both text and audio responses, you can + + use: + + + `["text", "audio"]` + items: + type: string + enum: + - text + - audio + - type: 'null' + ChatCompletionNamedToolChoice: + type: object + title: Function tool choice + description: Specifies a tool the model should use. Use to force the model to call a specific function. + properties: + type: + type: string + enum: + - function + description: For function calling, the type is always `function`. + x-stainless-const: true + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + required: + - name + required: + - type + - function + ChatCompletionNamedToolChoiceCustom: + type: object + title: Custom tool choice + description: Specifies a tool the model should use. Use to force the model to call a specific custom tool. + properties: + type: + type: string + enum: + - custom + description: For custom tool calling, the type is always `custom`. + x-stainless-const: true + custom: + type: object + properties: + name: + type: string + description: The name of the custom tool to call. + required: + - name + required: + - type + - custom + ChatCompletionRequestAssistantMessage: + type: object + title: Assistant message + description: | + Messages sent by the model in response to user messages. + properties: + content: + anyOf: + - description: > + The contents of the assistant message. Required unless `tool_calls` or `function_call` is + specified. + anyOf: + - type: string + description: The contents of the assistant message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type. Can be one or more of type `text`, or + exactly one of type `refusal`. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestAssistantMessageContentPart' + minItems: 1 + - type: 'null' + refusal: + anyOf: + - type: string + description: The refusal message by the assistant. + - type: 'null' + role: + type: string + enum: + - assistant + description: The role of the messages author, in this case `assistant`. + x-stainless-const: true + name: + type: string + description: >- + An optional name for the participant. Provides the model information to differentiate between + participants of the same role. + audio: + anyOf: + - type: object + description: | + Data about a previous audio response from the model. + [Learn more](https://platform.openai.com/docs/guides/audio). + required: + - id + properties: + id: + type: string + description: | + Unique identifier for a previous audio response from the model. + - type: 'null' + tool_calls: + $ref: '#/components/schemas/ChatCompletionMessageToolCalls' + function_call: + anyOf: + - type: object + deprecated: true + description: >- + Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be + called, as generated by the model. + properties: + arguments: + type: string + description: >- + The arguments to call the function with, as generated by the model in JSON format. Note + that the model does not always generate valid JSON, and may hallucinate parameters not + defined by your function schema. Validate the arguments in your code before calling your + function. + name: + type: string + description: The name of the function to call. + required: + - arguments + - name + - type: 'null' + required: + - role + x-stainless-soft-required: + - content + ChatCompletionRequestAssistantMessageContentPart: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartRefusal' + ChatCompletionRequestDeveloperMessage: + type: object + title: Developer message + description: | + Developer-provided instructions that the model should follow, regardless of + messages sent by the user. With o1 models and newer, `developer` messages + replace the previous `system` messages. + properties: + content: + description: The contents of the developer message. + anyOf: + - type: string + description: The contents of the developer message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type. For developer messages, only type `text` is + supported. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + minItems: 1 + role: + type: string + enum: + - developer + description: The role of the messages author, in this case `developer`. + x-stainless-const: true + name: + type: string + description: >- + An optional name for the participant. Provides the model information to differentiate between + participants of the same role. + required: + - content + - role + x-stainless-naming: + go: + variant_constructor: DeveloperMessage + ChatCompletionRequestFunctionMessage: + type: object + title: Function message + deprecated: true + properties: + role: + type: string + enum: + - function + description: The role of the messages author, in this case `function`. + x-stainless-const: true + content: + anyOf: + - type: string + description: The contents of the function message. + - type: 'null' + name: + type: string + description: The name of the function to call. + required: + - role + - content + - name + ChatCompletionRequestMessage: + discriminator: + propertyName: role + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestDeveloperMessage' + - $ref: '#/components/schemas/ChatCompletionRequestSystemMessage' + - $ref: '#/components/schemas/ChatCompletionRequestUserMessage' + - $ref: '#/components/schemas/ChatCompletionRequestAssistantMessage' + - $ref: '#/components/schemas/ChatCompletionRequestToolMessage' + - $ref: '#/components/schemas/ChatCompletionRequestFunctionMessage' + ChatCompletionRequestMessageContentPartAudio: + type: object + title: Audio content part + description: | + Learn about [audio inputs](https://platform.openai.com/docs/guides/audio). + properties: + type: + type: string + enum: + - input_audio + description: The type of the content part. Always `input_audio`. + x-stainless-const: true + input_audio: + type: object + properties: + data: + type: string + description: Base64 encoded audio data. + format: + type: string + enum: + - wav + - mp3 + description: | + The format of the encoded audio data. Currently supports "wav" and "mp3". + required: + - data + - format + required: + - type + - input_audio + x-stainless-naming: + go: + variant_constructor: InputAudioContentPart + ChatCompletionRequestMessageContentPartFile: + type: object + title: File content part + description: | + Learn about [file inputs](https://platform.openai.com/docs/guides/text) for text generation. + properties: + type: + type: string + enum: + - file + description: The type of the content part. Always `file`. + x-stainless-const: true + file: + type: object + properties: + filename: + type: string + description: | + The name of the file, used when passing the file to the model as a + string. + file_data: + type: string + description: | + The base64 encoded file data, used when passing the file to the model + as a string. + file_id: + type: string + description: | + The ID of an uploaded file to use as input. + x-stainless-naming: + java: + type_name: FileObject + kotlin: + type_name: FileObject + required: + - type + - file + x-stainless-naming: + go: + variant_constructor: FileContentPart + ChatCompletionRequestMessageContentPartImage: + type: object + title: Image content part + description: | + Learn about [image inputs](https://platform.openai.com/docs/guides/vision). + properties: + type: + type: string + enum: + - image_url + description: The type of the content part. + x-stainless-const: true + image_url: + type: object + properties: + url: + type: string + description: Either a URL of the image or the base64 encoded image data. + format: uri + detail: + type: string + description: >- + Specifies the detail level of the image. Learn more in the [Vision + guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding). + enum: + - auto + - low + - high + default: auto + required: + - url + required: + - type + - image_url + x-stainless-naming: + go: + variant_constructor: ImageContentPart + ChatCompletionRequestMessageContentPartRefusal: + type: object + title: Refusal content part + properties: + type: + type: string + enum: + - refusal + description: The type of the content part. + x-stainless-const: true + refusal: + type: string + description: The refusal message generated by the model. + required: + - type + - refusal + ChatCompletionRequestMessageContentPartText: + type: object + title: Text content part + description: | + Learn about [text inputs](https://platform.openai.com/docs/guides/text-generation). + properties: + type: + type: string + enum: + - text + description: The type of the content part. + x-stainless-const: true + text: + type: string + description: The text content. + required: + - type + - text + x-stainless-naming: + go: + variant_constructor: TextContentPart + ChatCompletionRequestSystemMessage: + type: object + title: System message + description: | + Developer-provided instructions that the model should follow, regardless of + messages sent by the user. With o1 models and newer, use `developer` messages + for this purpose instead. + properties: + content: + description: The contents of the system message. + anyOf: + - type: string + description: The contents of the system message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type. For system messages, only type `text` is + supported. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestSystemMessageContentPart' + minItems: 1 + role: + type: string + enum: + - system + description: The role of the messages author, in this case `system`. + x-stainless-const: true + name: + type: string + description: >- + An optional name for the participant. Provides the model information to differentiate between + participants of the same role. + required: + - content + - role + x-stainless-naming: + go: + variant_constructor: SystemMessage + ChatCompletionRequestSystemMessageContentPart: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + ChatCompletionRequestToolMessage: + type: object + title: Tool message + properties: + role: + type: string + enum: + - tool + description: The role of the messages author, in this case `tool`. + x-stainless-const: true + content: + description: The contents of the tool message. + anyOf: + - type: string + description: The contents of the tool message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type. For tool messages, only type `text` is + supported. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestToolMessageContentPart' + minItems: 1 + tool_call_id: + type: string + description: Tool call that this message is responding to. + required: + - role + - content + - tool_call_id + x-stainless-naming: + go: + variant_constructor: ToolMessage + ChatCompletionRequestToolMessageContentPart: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + ChatCompletionRequestUserMessage: + type: object + title: User message + description: | + Messages sent by an end user, containing prompts or additional context + information. + properties: + content: + description: | + The contents of the user message. + anyOf: + - type: string + description: The text contents of the message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type. Supported options differ based on the + [model](https://platform.openai.com/docs/models) being used to generate the response. Can + contain text, image, or audio inputs. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestUserMessageContentPart' + minItems: 1 + role: + type: string + enum: + - user + description: The role of the messages author, in this case `user`. + x-stainless-const: true + name: + type: string + description: >- + An optional name for the participant. Provides the model information to differentiate between + participants of the same role. + required: + - content + - role + x-stainless-naming: + go: + variant_constructor: UserMessage + ChatCompletionRequestUserMessageContentPart: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartImage' + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartAudio' + - $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartFile' + discriminator: + propertyName: type + ChatCompletionResponseMessage: + type: object + description: A chat completion message generated by the model. + properties: + content: + anyOf: + - type: string + description: The contents of the message. + - type: 'null' + refusal: + anyOf: + - type: string + description: The refusal message generated by the model. + - type: 'null' + tool_calls: + $ref: '#/components/schemas/ChatCompletionMessageToolCalls' + annotations: + type: array + description: | + Annotations for the message, when applicable, as when using the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + items: + type: object + description: | + A URL citation when using web search. + required: + - type + - url_citation + properties: + type: + type: string + description: The type of the URL citation. Always `url_citation`. + enum: + - url_citation + x-stainless-const: true + url_citation: + type: object + description: A URL citation when using web search. + required: + - end_index + - start_index + - url + - title + properties: + end_index: + type: integer + description: The index of the last character of the URL citation in the message. + start_index: + type: integer + description: The index of the first character of the URL citation in the message. + url: + type: string + description: The URL of the web resource. + title: + type: string + description: The title of the web resource. + role: + type: string + enum: + - assistant + description: The role of the author of this message. + x-stainless-const: true + function_call: + type: object + deprecated: true + description: >- + Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be + called, as generated by the model. + properties: + arguments: + type: string + description: >- + The arguments to call the function with, as generated by the model in JSON format. Note that + the model does not always generate valid JSON, and may hallucinate parameters not defined by + your function schema. Validate the arguments in your code before calling your function. + name: + type: string + description: The name of the function to call. + required: + - name + - arguments + audio: + anyOf: + - type: object + description: > + If the audio output modality is requested, this object contains data + + about the audio response from the model. [Learn + more](https://platform.openai.com/docs/guides/audio). + required: + - id + - expires_at + - data + - transcript + properties: + id: + type: string + description: Unique identifier for this audio response. + expires_at: + type: integer + description: | + The Unix timestamp (in seconds) for when this audio response will + no longer be accessible on the server for use in multi-turn + conversations. + data: + type: string + description: | + Base64 encoded audio bytes generated by the model, in the format + specified in the request. + transcript: + type: string + description: Transcript of the audio generated by the model. + - type: 'null' + required: + - role + - content + - refusal + ChatCompletionRole: + type: string + description: The role of the author of a message + enum: + - developer + - system + - user + - assistant + - tool + - function + ChatCompletionStreamOptions: + anyOf: + - description: | + Options for streaming response. Only set this when you set `stream: true`. + type: object + properties: + include_usage: + type: boolean + description: | + If set, an additional chunk will be streamed before the `data: [DONE]` + message. The `usage` field on this chunk shows the token usage statistics + for the entire request, and the `choices` field will always be an empty + array. + + All other chunks will also include a `usage` field, but with a null + value. **NOTE:** If the stream is interrupted, you may not receive the + final usage chunk which contains the total token usage for the request. + include_obfuscation: + type: boolean + description: | + When true, stream obfuscation will be enabled. Stream obfuscation adds + random characters to an `obfuscation` field on streaming delta events to + normalize payload sizes as a mitigation to certain side-channel attacks. + These obfuscation fields are included by default, but add a small amount + of overhead to the data stream. You can set `include_obfuscation` to + false to optimize for bandwidth if you trust the network links between + your application and the OpenAI API. + - type: 'null' + ChatCompletionStreamResponseDelta: + type: object + description: A chat completion delta generated by streamed model responses. + properties: + content: + anyOf: + - type: string + description: The contents of the chunk message. + - type: 'null' + function_call: + deprecated: true + type: object + description: >- + Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be + called, as generated by the model. + properties: + arguments: + type: string + description: >- + The arguments to call the function with, as generated by the model in JSON format. Note that + the model does not always generate valid JSON, and may hallucinate parameters not defined by + your function schema. Validate the arguments in your code before calling your function. + name: + type: string + description: The name of the function to call. + tool_calls: + type: array + items: + $ref: '#/components/schemas/ChatCompletionMessageToolCallChunk' + role: + type: string + enum: + - developer + - system + - user + - assistant + - tool + description: The role of the author of this message. + refusal: + anyOf: + - type: string + description: The refusal message generated by the model. + - type: 'null' + ChatCompletionTokenLogprob: + type: object + properties: + token: + description: The token. + type: string + logprob: + description: >- + The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the + value `-9999.0` is used to signify that the token is very unlikely. + type: number + bytes: + anyOf: + - description: >- + A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte representations + must be combined to generate the correct text representation. Can be `null` if there is no + bytes representation for the token. + type: array + items: + type: integer + - type: 'null' + top_logprobs: + description: >- + List of the most likely tokens and their log probability, at this token position. In rare cases, + there may be fewer than the number of requested `top_logprobs` returned. + type: array + items: + type: object + properties: + token: + description: The token. + type: string + logprob: + description: >- + The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, + the value `-9999.0` is used to signify that the token is very unlikely. + type: number + bytes: + anyOf: + - description: >- + A list of integers representing the UTF-8 bytes representation of the token. Useful in + instances where characters are represented by multiple tokens and their byte + representations must be combined to generate the correct text representation. Can be + `null` if there is no bytes representation for the token. + type: array + items: + type: integer + - type: 'null' + required: + - token + - logprob + - bytes + required: + - token + - logprob + - bytes + - top_logprobs + ChatCompletionTool: + type: object + title: Function tool + description: | + A function tool that can be used to generate a response. + properties: + type: + type: string + enum: + - function + description: The type of the tool. Currently, only `function` is supported. + x-stainless-const: true + function: + $ref: '#/components/schemas/FunctionObject' + required: + - type + - function + ChatCompletionToolChoiceOption: + description: > + Controls which (if any) tool is called by the model. + + `none` means the model will not call any tool and instead generates a message. + + `auto` means the model can pick between generating a message or calling one or more tools. + + `required` means the model must call one or more tools. + + Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces + the model to call that tool. + + + `none` is the default when no tools are present. `auto` is the default if tools are present. + anyOf: + - type: string + title: Auto + description: > + `none` means the model will not call any tool and instead generates a message. `auto` means the + model can pick between generating a message or calling one or more tools. `required` means the + model must call one or more tools. + enum: + - none + - auto + - required + - $ref: '#/components/schemas/ChatCompletionAllowedToolsChoice' + - $ref: '#/components/schemas/ChatCompletionNamedToolChoice' + - $ref: '#/components/schemas/ChatCompletionNamedToolChoiceCustom' + x-stainless-go-variant-constructor: + naming: tool_choice_option_{variant} + ChunkingStrategyRequestParam: + type: object + description: >- + The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only + applicable if `file_ids` is non-empty. + anyOf: + - $ref: '#/components/schemas/AutoChunkingStrategyRequestParam' + - $ref: '#/components/schemas/StaticChunkingStrategyRequestParam' + discriminator: + propertyName: type + CodeInterpreterFileOutput: + type: object + title: Code interpreter file output + description: | + The output of a code interpreter tool call that is a file. + properties: + type: + type: string + enum: + - files + description: | + The type of the code interpreter file output. Always `files`. + x-stainless-const: true + files: + type: array + items: + type: object + properties: + mime_type: + type: string + description: | + The MIME type of the file. + file_id: + type: string + description: | + The ID of the file. + required: + - mime_type + - file_id + required: + - type + - files + CodeInterpreterTextOutput: + type: object + title: Code interpreter text output + description: | + The output of a code interpreter tool call that is text. + properties: + type: + type: string + enum: + - logs + description: | + The type of the code interpreter text output. Always `logs`. + x-stainless-const: true + logs: + type: string + description: | + The logs of the code interpreter tool call. + required: + - type + - logs + CodeInterpreterTool: + type: object + title: Code interpreter + description: | + A tool that runs Python code to help generate a response to a prompt. + properties: + type: + type: string + enum: + - code_interpreter + description: | + The type of the code interpreter tool. Always `code_interpreter`. + x-stainless-const: true + container: + description: | + The code interpreter container. Can be a container ID or an object that + specifies uploaded file IDs to make available to your code. + anyOf: + - type: string + description: The container ID. + - $ref: '#/components/schemas/CodeInterpreterContainerAuto' + required: + - type + - container + CodeInterpreterToolCall: + type: object + title: Code interpreter tool call + description: | + A tool call to run code. + properties: + type: + type: string + enum: + - code_interpreter_call + default: code_interpreter_call + x-stainless-const: true + description: | + The type of the code interpreter tool call. Always `code_interpreter_call`. + id: + type: string + description: | + The unique ID of the code interpreter tool call. + status: + type: string + enum: + - in_progress + - completed + - incomplete + - interpreting + - failed + description: > + The status of the code interpreter tool call. Valid values are `in_progress`, `completed`, + `incomplete`, `interpreting`, and `failed`. + container_id: + type: string + description: | + The ID of the container used to run the code. + code: + anyOf: + - type: string + description: | + The code to run, or null if not available. + - type: 'null' + outputs: + anyOf: + - type: array + items: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/CodeInterpreterOutputLogs' + - $ref: '#/components/schemas/CodeInterpreterOutputImage' + discriminator: + propertyName: type + description: | + The outputs generated by the code interpreter, such as logs or images. + Can be null if no outputs are available. + - type: 'null' + required: + - type + - id + - status + - container_id + - code + - outputs + ComparisonFilter: + type: object + additionalProperties: false + title: Comparison Filter + description: > + A filter used to compare a specified attribute key to a given value using a defined comparison + operation. + properties: + type: + type: string + default: eq + enum: + - eq + - ne + - gt + - gte + - lt + - lte + description: | + Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `nin`. + - `eq`: equals + - `ne`: not equal + - `gt`: greater than + - `gte`: greater than or equal + - `lt`: less than + - `lte`: less than or equal + - `in`: in + - `nin`: not in + key: + type: string + description: The key to compare against the value. + value: + description: The value to compare against the attribute key; supports string, number, or boolean types. + anyOf: + - type: string + - type: number + - type: boolean + - type: array + items: + $ref: '#/components/schemas/ComparisonFilterValueItems' + required: + - type + - key + - value + x-oaiMeta: + name: ComparisonFilter + CompleteUploadRequest: + type: object + additionalProperties: false + properties: + part_ids: + type: array + description: | + The ordered list of Part IDs. + items: + type: string + md5: + description: > + The optional md5 checksum for the file contents to verify if the bytes uploaded matches what you + expect. + type: string + required: + - part_ids + CompletionUsage: + type: object + description: Usage statistics for the completion request. + properties: + completion_tokens: + type: integer + default: 0 + description: Number of tokens in the generated completion. + prompt_tokens: + type: integer + default: 0 + description: Number of tokens in the prompt. + total_tokens: + type: integer + default: 0 + description: Total number of tokens used in the request (prompt + completion). + completion_tokens_details: + type: object + description: Breakdown of tokens used in a completion. + properties: + accepted_prediction_tokens: + type: integer + default: 0 + description: | + When using Predicted Outputs, the number of tokens in the + prediction that appeared in the completion. + audio_tokens: + type: integer + default: 0 + description: Audio input tokens generated by the model. + reasoning_tokens: + type: integer + default: 0 + description: Tokens generated by the model for reasoning. + rejected_prediction_tokens: + type: integer + default: 0 + description: | + When using Predicted Outputs, the number of tokens in the + prediction that did not appear in the completion. However, like + reasoning tokens, these tokens are still counted in the total + completion tokens for purposes of billing, output, and context window + limits. + prompt_tokens_details: + type: object + description: Breakdown of tokens used in the prompt. + properties: + audio_tokens: + type: integer + default: 0 + description: Audio input tokens present in the prompt. + cached_tokens: + type: integer + default: 0 + description: Cached tokens present in the prompt. + required: + - prompt_tokens + - completion_tokens + - total_tokens + CompoundFilter: + $recursiveAnchor: true + type: object + additionalProperties: false + title: Compound Filter + description: Combine multiple filters using `and` or `or`. + properties: + type: + type: string + description: 'Type of operation: `and` or `or`.' + enum: + - and + - or + filters: + type: array + description: Array of filters to combine. Items can be `ComparisonFilter` or `CompoundFilter`. + items: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ComparisonFilter' + - $recursiveRef: '#' + required: + - type + - filters + x-oaiMeta: + name: CompoundFilter + ComputerAction: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ClickParam' + - $ref: '#/components/schemas/DoubleClickAction' + - $ref: '#/components/schemas/Drag' + - $ref: '#/components/schemas/KeyPressAction' + - $ref: '#/components/schemas/Move' + - $ref: '#/components/schemas/Screenshot' + - $ref: '#/components/schemas/Scroll' + - $ref: '#/components/schemas/Type' + - $ref: '#/components/schemas/Wait' + ComputerScreenshotImage: + type: object + description: | + A computer screenshot image used with the computer use tool. + properties: + type: + type: string + enum: + - computer_screenshot + default: computer_screenshot + description: | + Specifies the event type. For a computer screenshot, this property is + always set to `computer_screenshot`. + x-stainless-const: true + image_url: + type: string + description: The URL of the screenshot image. + file_id: + type: string + description: The identifier of an uploaded file that contains the screenshot. + required: + - type + ComputerToolCall: + type: object + title: Computer tool call + description: | + A tool call to a computer use tool. See the + [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information. + properties: + type: + type: string + description: The type of the computer call. Always `computer_call`. + enum: + - computer_call + default: computer_call + id: + type: string + description: The unique ID of the computer call. + call_id: + type: string + description: | + An identifier used when responding to the tool call with output. + action: + $ref: '#/components/schemas/ComputerAction' + pending_safety_checks: + type: array + items: + $ref: '#/components/schemas/ComputerCallSafetyCheckParam' + description: | + The pending safety checks for the computer call. + status: + type: string + description: | + The status of the item. One of `in_progress`, `completed`, or + `incomplete`. Populated when items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - type + - id + - action + - call_id + - pending_safety_checks + - status + ComputerToolCallOutput: + type: object + title: Computer tool call output + description: | + The output of a computer tool call. + properties: + type: + type: string + description: | + The type of the computer tool call output. Always `computer_call_output`. + enum: + - computer_call_output + default: computer_call_output + x-stainless-const: true + id: + type: string + description: | + The ID of the computer tool call output. + call_id: + type: string + description: | + The ID of the computer tool call that produced the output. + acknowledged_safety_checks: + type: array + description: | + The safety checks reported by the API that have been acknowledged by the + developer. + items: + $ref: '#/components/schemas/ComputerCallSafetyCheckParam' + output: + $ref: '#/components/schemas/ComputerScreenshotImage' + status: + type: string + description: | + The status of the message input. One of `in_progress`, `completed`, or + `incomplete`. Populated when input items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - type + - call_id + - output + ComputerToolCallOutputResource: + allOf: + - $ref: '#/components/schemas/ComputerToolCallOutput' + - type: object + properties: + id: + type: string + description: | + The unique ID of the computer call tool output. + required: + - id + ContainerFileListResource: + type: object + properties: + object: + description: The type of object returned, must be 'list'. + const: list + data: + type: array + description: A list of container files. + items: + $ref: '#/components/schemas/ContainerFileResource' + first_id: + type: string + description: The ID of the first file in the list. + last_id: + type: string + description: The ID of the last file in the list. + has_more: + type: boolean + description: Whether there are more files available. + required: + - object + - data + - first_id + - last_id + - has_more + ContainerFileResource: + type: object + title: The container file object + properties: + id: + type: string + description: Unique identifier for the file. + object: + type: string + description: The type of this object (`container.file`). + const: container.file + container_id: + type: string + description: The container this file belongs to. + created_at: + type: integer + description: Unix timestamp (in seconds) when the file was created. + bytes: + type: integer + description: Size of the file in bytes. + path: + type: string + description: Path of the file in the container. + source: + type: string + description: Source of the file (e.g., `user`, `assistant`). + required: + - id + - object + - created_at + - bytes + - container_id + - path + - source + x-oaiMeta: + name: The container file object + example: | + { + "id": "cfile_682e0e8a43c88191a7978f477a09bdf5", + "object": "container.file", + "created_at": 1747848842, + "bytes": 880, + "container_id": "cntr_682e0e7318108198aa783fd921ff305e08e78805b9fdbb04", + "path": "/mnt/data/88e12fa445d32636f190a0b33daed6cb-tsconfig.json", + "source": "user" + } + ContainerListResource: + type: object + properties: + object: + description: The type of object returned, must be 'list'. + const: list + data: + type: array + description: A list of containers. + items: + $ref: '#/components/schemas/ContainerResource' + first_id: + type: string + description: The ID of the first container in the list. + last_id: + type: string + description: The ID of the last container in the list. + has_more: + type: boolean + description: Whether there are more containers available. + required: + - object + - data + - first_id + - last_id + - has_more + ContainerResource: + type: object + title: The container object + properties: + id: + type: string + description: Unique identifier for the container. + object: + type: string + description: The type of this object. + name: + type: string + description: Name of the container. + created_at: + type: integer + description: Unix timestamp (in seconds) when the container was created. + status: + type: string + description: Status of the container (e.g., active, deleted). + expires_after: + type: object + description: | + The container will expire after this time period. + The anchor is the reference point for the expiration. + The minutes is the number of minutes after the anchor before the container expires. + properties: + anchor: + type: string + description: The reference point for the expiration. + enum: + - last_active_at + minutes: + type: integer + description: The number of minutes after the anchor before the container expires. + required: + - id + - object + - name + - created_at + - status + - id + - name + - created_at + - status + x-oaiMeta: + name: The container object + example: | + { + "id": "cntr_682dfebaacac8198bbfe9c2474fb6f4a085685cbe3cb5863", + "object": "container", + "created_at": 1747844794, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747844794, + "name": "My Container" + } + Content: + description: | + Multi-modal input and output contents. + anyOf: + - title: Input content types + $ref: '#/components/schemas/InputContent' + - title: Output content types + $ref: '#/components/schemas/OutputContent' + Conversation: + title: The conversation object + allOf: + - $ref: '#/components/schemas/ConversationResource' + x-oaiMeta: + name: The conversation object + group: conversations + ConversationItem: + title: Conversation item + description: >- + A single item within a conversation. The set of possible types are the same as the `output` type of a + [Response + object](https://platform.openai.com/docs/api-reference/responses/object#responses/object-output). + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/Message' + - $ref: '#/components/schemas/FunctionToolCallResource' + - $ref: '#/components/schemas/FunctionToolCallOutputResource' + - $ref: '#/components/schemas/FileSearchToolCall' + - $ref: '#/components/schemas/WebSearchToolCall' + - $ref: '#/components/schemas/ImageGenToolCall' + - $ref: '#/components/schemas/ComputerToolCall' + - $ref: '#/components/schemas/ComputerToolCallOutputResource' + - $ref: '#/components/schemas/ReasoningItem' + - $ref: '#/components/schemas/CodeInterpreterToolCall' + - $ref: '#/components/schemas/LocalShellToolCall' + - $ref: '#/components/schemas/LocalShellToolCallOutput' + - $ref: '#/components/schemas/FunctionShellCall' + - $ref: '#/components/schemas/FunctionShellCallOutput' + - $ref: '#/components/schemas/ApplyPatchToolCall' + - $ref: '#/components/schemas/ApplyPatchToolCallOutput' + - $ref: '#/components/schemas/MCPListTools' + - $ref: '#/components/schemas/MCPApprovalRequest' + - $ref: '#/components/schemas/MCPApprovalResponseResource' + - $ref: '#/components/schemas/MCPToolCall' + - $ref: '#/components/schemas/CustomToolCall' + - $ref: '#/components/schemas/CustomToolCallOutput' + ConversationItemList: + type: object + title: The conversation item list + description: A list of Conversation items. + properties: + object: + description: The type of object returned, must be `list`. + x-stainless-const: true + const: list + data: + type: array + description: A list of conversation items. + items: + $ref: '#/components/schemas/ConversationItem' + has_more: + type: boolean + description: Whether there are more items available. + first_id: + type: string + description: The ID of the first item in the list. + last_id: + type: string + description: The ID of the last item in the list. + required: + - object + - data + - has_more + - first_id + - last_id + x-oaiMeta: + name: The item list + group: conversations + ConversationParam: + description: > + The conversation that this response belongs to. Items from this conversation are prepended to + `input_items` for this response request. + + Input items and output items from this response are automatically added to this conversation after + this response completes. + anyOf: + - type: string + title: Conversation ID + description: | + The unique ID of the conversation. + - $ref: '#/components/schemas/ConversationParam-2' + CostsResult: + type: object + description: The aggregated costs details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.costs.result + x-stainless-const: true + amount: + type: object + description: The monetary value in its associated currency. + properties: + value: + type: number + description: The numeric value of the cost. + currency: + type: string + description: Lowercase ISO-4217 currency e.g. "usd" + line_item: + anyOf: + - type: string + description: When `group_by=line_item`, this field provides the line item of the grouped costs result. + - type: 'null' + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped costs result. + - type: 'null' + required: + - object + x-oaiMeta: + name: Costs object + example: | + { + "object": "organization.costs.result", + "amount": { + "value": 0.06, + "currency": "usd" + }, + "line_item": "Image models", + "project_id": "proj_abc" + } + CreateAssistantRequest: + type: object + additionalProperties: false + properties: + model: + description: > + ID of the model to use. You can use the [List + models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your + available models, or see our [Model overview](https://platform.openai.com/docs/models) for + descriptions of them. + example: gpt-4o + anyOf: + - type: string + - $ref: '#/components/schemas/AssistantSupportedModels' + x-oaiTypeLabel: string + name: + anyOf: + - description: | + The name of the assistant. The maximum length is 256 characters. + type: string + maxLength: 256 + - type: 'null' + description: + anyOf: + - description: | + The description of the assistant. The maximum length is 512 characters. + type: string + maxLength: 512 + - type: 'null' + instructions: + anyOf: + - description: | + The system instructions that the assistant uses. The maximum length is 256,000 characters. + type: string + maxLength: 256000 + - type: 'null' + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + tools: + description: > + A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools + can be of types `code_interpreter`, `file_search`, or `function`. + default: [] + type: array + maxItems: 128 + items: + $ref: '#/components/schemas/AssistantTool' + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are used by the assistant's tools. The resources are specific to the + type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the + `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this assistant. There can be a maximum of 1 vector store attached to the assistant. + maxItems: 1 + items: + type: string + vector_stores: + type: array + description: > + A helper to create a [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) with + file_ids and attach it to this assistant. There can be a maximum of 1 vector store + attached to the assistant. + maxItems: 1 + items: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to + add to the vector store. There can be a maximum of 10000 files in a vector + store. + maxItems: 10000 + items: + type: string + chunking_strategy: + type: object + description: >- + The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. + anyOf: + - type: object + title: Auto Chunking Strategy + description: >- + The default strategy. This strategy currently uses a `max_chunk_size_tokens` + of `800` and `chunk_overlap_tokens` of `400`. + additionalProperties: false + properties: + type: + type: string + description: Always `auto`. + enum: + - auto + x-stainless-const: true + required: + - type + - type: object + title: Static Chunking Strategy + additionalProperties: false + properties: + type: + type: string + description: Always `static`. + enum: + - static + x-stainless-const: true + static: + type: object + additionalProperties: false + properties: + max_chunk_size_tokens: + type: integer + minimum: 100 + maximum: 4096 + description: >- + The maximum number of tokens in each chunk. The default value is + `800`. The minimum value is `100` and the maximum value is `4096`. + chunk_overlap_tokens: + type: integer + description: > + The number of tokens that overlap between chunks. The default value + is `400`. + + + Note that the overlap must not exceed half of + `max_chunk_size_tokens`. + required: + - max_chunk_size_tokens + - chunk_overlap_tokens + required: + - type + - static + discriminator: + propertyName: type + metadata: + $ref: '#/components/schemas/Metadata' + anyOf: + - required: + - vector_store_ids + - required: + - vector_stores + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + anyOf: + - description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + - type: 'null' + top_p: + anyOf: + - type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + - type: 'null' + response_format: + anyOf: + - $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + - type: 'null' + required: + - model + CreateChatCompletionRequest: + allOf: + - $ref: '#/components/schemas/CreateModelResponseProperties' + - type: object + properties: + messages: + description: > + A list of messages comprising the conversation so far. Depending on the + + [model](https://platform.openai.com/docs/models) you use, different message types (modalities) + are + + supported, like [text](https://platform.openai.com/docs/guides/text-generation), + + [images](https://platform.openai.com/docs/guides/vision), and + [audio](https://platform.openai.com/docs/guides/audio). + type: array + minItems: 1 + items: + $ref: '#/components/schemas/ChatCompletionRequestMessage' + model: + description: > + Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI + + offers a wide range of models with different capabilities, performance + + characteristics, and price points. Refer to the [model + guide](https://platform.openai.com/docs/models) + + to browse and compare available models. + $ref: '#/components/schemas/ModelIdsShared' + modalities: + $ref: '#/components/schemas/ResponseModalities' + verbosity: + $ref: '#/components/schemas/Verbosity' + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + max_completion_tokens: + description: > + An upper bound for the number of tokens that can be generated for a completion, including + visible output tokens and [reasoning + tokens](https://platform.openai.com/docs/guides/reasoning). + type: integer + nullable: true + frequency_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: | + Number between -2.0 and 2.0. Positive values penalize new tokens based on + their existing frequency in the text so far, decreasing the model's + likelihood to repeat the same line verbatim. + presence_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: | + Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood + to talk about new topics. + web_search_options: + type: object + title: Web search + description: > + This tool searches the web for relevant results to use in a response. + + Learn more about the [web search + tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat). + properties: + user_location: + type: object + nullable: true + required: + - type + - approximate + description: | + Approximate location parameters for the search. + properties: + type: + type: string + description: | + The type of location approximation. Always `approximate`. + enum: + - approximate + x-stainless-const: true + approximate: + $ref: '#/components/schemas/WebSearchLocation' + search_context_size: + $ref: '#/components/schemas/WebSearchContextSize' + top_logprobs: + description: | + An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + `logprobs` must be set to `true` if this parameter is used. + type: integer + minimum: 0 + maximum: 20 + nullable: true + response_format: + description: | + An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + Structured Outputs which ensures the model will match your supplied JSON + schema. Learn more in the [Structured Outputs + guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` + is preferred for models that support it. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ResponseFormatText' + - $ref: '#/components/schemas/ResponseFormatJsonSchema' + - $ref: '#/components/schemas/ResponseFormatJsonObject' + audio: + type: object + nullable: true + description: | + Parameters for audio output. Required when audio output is requested with + `modalities: ["audio"]`. [Learn more](https://platform.openai.com/docs/guides/audio). + required: + - voice + - format + properties: + voice: + $ref: '#/components/schemas/VoiceIdsShared' + description: | + The voice the model uses to respond. Supported voices are + `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, and `shimmer`. + format: + type: string + enum: + - wav + - aac + - mp3 + - flac + - opus + - pcm16 + description: | + Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, + `opus`, or `pcm16`. + store: + type: boolean + default: false + nullable: true + description: | + Whether or not to store the output of this chat completion request for + use in our [model distillation](https://platform.openai.com/docs/guides/distillation) or + [evals](https://platform.openai.com/docs/guides/evals) products. + + Supports text and image inputs. Note: image inputs over 8MB will be dropped. + stream: + description: > + If set to true, the model response data will be streamed to the client + + as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + + See the [Streaming section + below](https://platform.openai.com/docs/api-reference/chat/streaming) + + for more information, along with the [streaming + responses](https://platform.openai.com/docs/guides/streaming-responses) + + guide for more information on how to handle the streaming events. + type: boolean + nullable: true + default: false + stop: + $ref: '#/components/schemas/StopConfiguration' + logit_bias: + type: object + x-oaiTypeLabel: map + default: null + nullable: true + additionalProperties: + type: integer + description: | + Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, + the bias is added to the logits generated by the model prior to sampling. + The exact effect will vary per model, but values between -1 and 1 should + decrease or increase likelihood of selection; values like -100 or 100 + should result in a ban or exclusive selection of the relevant token. + logprobs: + description: | + Whether to return log probabilities of the output tokens or not. If true, + returns the log probabilities of each output token returned in the + `content` of `message`. + type: boolean + default: false + nullable: true + max_tokens: + description: | + The maximum number of [tokens](/tokenizer) that can be generated in the + chat completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. + + This value is now deprecated in favor of `max_completion_tokens`, and is + not compatible with [o-series models](https://platform.openai.com/docs/guides/reasoning). + type: integer + nullable: true + deprecated: true + 'n': + type: integer + minimum: 1 + maximum: 128 + default: 1 + example: 1 + nullable: true + description: >- + How many chat completion choices to generate for each input message. Note that you will be + charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to + minimize costs. + prediction: + nullable: true + description: > + Configuration for a [Predicted + Output](https://platform.openai.com/docs/guides/predicted-outputs), + + which can greatly improve response times when large parts of the model + + response are known ahead of time. This is most common when you are + + regenerating a file with only minor changes to most of the content. + anyOf: + - $ref: '#/components/schemas/PredictionContent' + discriminator: + propertyName: type + seed: + type: integer + minimum: -9223372036854776000 + maximum: 9223372036854776000 + nullable: true + deprecated: true + description: > + This feature is in Beta. + + If specified, our system will make a best effort to sample deterministically, such that + repeated requests with the same `seed` and parameters should return the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` response + parameter to monitor changes in the backend. + x-oaiMeta: + beta: true + stream_options: + $ref: '#/components/schemas/ChatCompletionStreamOptions' + tools: + type: array + description: | + A list of tools the model may call. You can provide either + [custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) or + [function tools](https://platform.openai.com/docs/guides/function-calling). + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionTool' + - $ref: '#/components/schemas/CustomToolChatCompletions' + x-stainless-naming: + python: + model_name: chat_completion_tool_union + param_model_name: chat_completion_tool_union_param + discriminator: + propertyName: type + x-stainless-go-variant-constructor: + naming: chat_completion_{variant}_tool + tool_choice: + $ref: '#/components/schemas/ChatCompletionToolChoiceOption' + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + function_call: + deprecated: true + description: | + Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a + message. + + `auto` means the model can pick between generating a message or calling a + function. + + Specifying a particular function via `{"name": "my_function"}` forces the + model to call that function. + + `none` is the default when no functions are present. `auto` is the default + if functions are present. + anyOf: + - type: string + description: > + `none` means the model will not call a function and instead generates a message. `auto` + means the model can pick between generating a message or calling a function. + enum: + - none + - auto + title: function call mode + - $ref: '#/components/schemas/ChatCompletionFunctionCallOption' + functions: + deprecated: true + description: | + Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + type: array + minItems: 1 + maxItems: 128 + items: + $ref: '#/components/schemas/ChatCompletionFunctions' + required: + - model + - messages + CreateChatCompletionResponse: + type: object + description: Represents a chat completion response returned by model, based on the provided input. + properties: + id: + type: string + description: A unique identifier for the chat completion. + choices: + type: array + description: A list of chat completion choices. Can be more than one if `n` is greater than 1. + items: + type: object + required: + - finish_reason + - index + - message + - logprobs + properties: + finish_reason: + type: string + description: > + The reason the model stopped generating tokens. This will be `stop` if the model hit a + natural stop point or a provided stop sequence, + + `length` if the maximum number of tokens specified in the request was reached, + + `content_filter` if content was omitted due to a flag from our content filters, + + `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called + a function. + enum: + - stop + - length + - tool_calls + - content_filter + - function_call + index: + type: integer + description: The index of the choice in the list of choices. + message: + $ref: '#/components/schemas/ChatCompletionResponseMessage' + logprobs: + anyOf: + - description: Log probability information for the choice. + type: object + properties: + content: + anyOf: + - description: A list of message content tokens with log probability information. + type: array + items: + $ref: '#/components/schemas/ChatCompletionTokenLogprob' + - type: 'null' + refusal: + anyOf: + - description: A list of message refusal tokens with log probability information. + type: array + items: + $ref: '#/components/schemas/ChatCompletionTokenLogprob' + - type: 'null' + required: + - content + - refusal + - type: 'null' + created: + type: integer + description: The Unix timestamp (in seconds) of when the chat completion was created. + model: + type: string + description: The model used for the chat completion. + service_tier: + $ref: '#/components/schemas/ServiceTier' + system_fingerprint: + type: string + deprecated: true + description: > + This fingerprint represents the backend configuration that the model runs with. + + + Can be used in conjunction with the `seed` request parameter to understand when backend changes + have been made that might impact determinism. + object: + type: string + description: The object type, which is always `chat.completion`. + enum: + - chat.completion + x-stainless-const: true + usage: + $ref: '#/components/schemas/CompletionUsage' + required: + - choices + - created + - id + - model + - object + x-oaiMeta: + name: The chat completion object + group: chat + example: | + { + "id": "chatcmpl-B9MHDbslfkBeAs8l4bebGdFOJ6PeG", + "object": "chat.completion", + "created": 1741570283, + "model": "gpt-4o-2024-08-06", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The image shows a wooden boardwalk path running through a lush green field or meadow. The sky is bright blue with some scattered clouds, giving the scene a serene and peaceful atmosphere. Trees and shrubs are visible in the background.", + "refusal": null, + "annotations": [] + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 1117, + "completion_tokens": 46, + "total_tokens": 1163, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": "fp_fc9f1d7035" + } + CreateChatCompletionStreamResponse: + type: object + description: | + Represents a streamed chunk of a chat completion response returned + by the model, based on the provided input. + [Learn more](https://platform.openai.com/docs/guides/streaming-responses). + properties: + id: + type: string + description: A unique identifier for the chat completion. Each chunk has the same ID. + choices: + type: array + description: > + A list of chat completion choices. Can contain more than one elements if `n` is greater than 1. + Can also be empty for the + + last chunk if you set `stream_options: {"include_usage": true}`. + items: + type: object + required: + - delta + - finish_reason + - index + properties: + delta: + $ref: '#/components/schemas/ChatCompletionStreamResponseDelta' + logprobs: + description: Log probability information for the choice. + type: object + nullable: true + properties: + content: + description: A list of message content tokens with log probability information. + type: array + items: + $ref: '#/components/schemas/ChatCompletionTokenLogprob' + nullable: true + refusal: + description: A list of message refusal tokens with log probability information. + type: array + items: + $ref: '#/components/schemas/ChatCompletionTokenLogprob' + nullable: true + required: + - content + - refusal + finish_reason: + type: string + description: > + The reason the model stopped generating tokens. This will be `stop` if the model hit a + natural stop point or a provided stop sequence, + + `length` if the maximum number of tokens specified in the request was reached, + + `content_filter` if content was omitted due to a flag from our content filters, + + `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called + a function. + enum: + - stop + - length + - tool_calls + - content_filter + - function_call + nullable: true + index: + type: integer + description: The index of the choice in the list of choices. + created: + type: integer + description: >- + The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same + timestamp. + model: + type: string + description: The model to generate the completion. + service_tier: + $ref: '#/components/schemas/ServiceTier' + system_fingerprint: + type: string + deprecated: true + description: > + This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when backend changes + have been made that might impact determinism. + object: + type: string + description: The object type, which is always `chat.completion.chunk`. + enum: + - chat.completion.chunk + x-stainless-const: true + usage: + $ref: '#/components/schemas/CompletionUsage' + nullable: true + description: | + An optional field that will only be present when you set + `stream_options: {"include_usage": true}` in your request. When present, it + contains a null value **except for the last chunk** which contains the + token usage statistics for the entire request. + + **NOTE:** If the stream is interrupted or cancelled, you may not + receive the final usage chunk which contains the total token usage for + the request. + required: + - choices + - created + - id + - model + - object + x-oaiMeta: + name: The chat completion chunk object + group: chat + example: > + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]} + + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + .... + + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + CreateCompletionRequest: + type: object + properties: + model: + description: > + ID of the model to use. You can use the [List + models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your + available models, or see our [Model overview](https://platform.openai.com/docs/models) for + descriptions of them. + anyOf: + - type: string + - type: string + enum: + - gpt-3.5-turbo-instruct + - davinci-002 + - babbage-002 + title: Preset + x-oaiTypeLabel: string + prompt: + description: > + The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, + or array of token arrays. + + + Note that <|endoftext|> is the document separator that the model sees during training, so if a + prompt is not specified the model will generate as if from the beginning of a new document. + nullable: true + anyOf: + - type: string + default: '' + example: This is a test. + - type: array + items: + type: string + default: '' + example: This is a test. + title: Array of strings + - type: array + minItems: 1 + items: + type: integer + title: Array of tokens + - type: array + minItems: 1 + items: + type: array + minItems: 1 + items: + type: integer + title: Array of token arrays + best_of: + type: integer + default: 1 + minimum: 0 + maximum: 20 + nullable: true + description: > + Generates `best_of` completions server-side and returns the "best" (the one with the highest log + probability per token). Results cannot be streamed. + + + When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how + many to return – `best_of` must be greater than `n`. + + + **Note:** Because this parameter generates many completions, it can quickly consume your token + quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + echo: + type: boolean + default: false + nullable: true + description: | + Echo back the prompt in addition to the completion + frequency_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: > + Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency + in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + + + [See more information about frequency and presence + penalties.](https://platform.openai.com/docs/guides/text-generation) + logit_bias: + type: object + x-oaiTypeLabel: map + default: null + nullable: true + additionalProperties: + type: integer + description: > + Modify the likelihood of specified tokens appearing in the completion. + + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an + associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) to + convert text to token IDs. Mathematically, the bias is added to the logits generated by the model + prior to sampling. The exact effect will vary per model, but values between -1 and 1 should + decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or + exclusive selection of the relevant token. + + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being + generated. + logprobs: + type: integer + minimum: 0 + maximum: 5 + default: null + nullable: true + description: > + Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen + tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. + The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` + elements in the response. + + + The maximum value for `logprobs` is 5. + max_tokens: + type: integer + minimum: 0 + default: 16 + example: 16 + nullable: true + description: > + The maximum number of [tokens](/tokenizer) that can be generated in the completion. + + + The token count of your prompt plus `max_tokens` cannot exceed the model's context length. + [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for + counting tokens. + 'n': + type: integer + minimum: 1 + maximum: 128 + default: 1 + example: 1 + nullable: true + description: > + How many completions to generate for each prompt. + + + **Note:** Because this parameter generates many completions, it can quickly consume your token + quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + presence_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: > + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in + the text so far, increasing the model's likelihood to talk about new topics. + + + [See more information about frequency and presence + penalties.](https://platform.openai.com/docs/guides/text-generation) + seed: + type: integer + format: int64 + nullable: true + description: > + If specified, our system will make a best effort to sample deterministically, such that repeated + requests with the same `seed` and parameters should return the same result. + + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter + to monitor changes in the backend. + stop: + $ref: '#/components/schemas/StopConfiguration' + stream: + description: > + Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python + code](https://cookbook.openai.com/examples/how_to_stream_completions). + type: boolean + nullable: true + default: false + stream_options: + $ref: '#/components/schemas/ChatCompletionStreamOptions' + suffix: + description: | + The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + default: null + nullable: true + type: string + example: test. + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + + + We generally recommend altering this or `top_p` but not both. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the + top 10% probability mass are considered. + + + We generally recommend altering this or `temperature` but not both. + user: + type: string + example: user-1234 + description: > + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + required: + - model + - prompt + CreateCompletionResponse: + type: object + description: > + Represents a completion response from the API. Note: both the streamed and non-streamed response + objects share the same shape (unlike the chat endpoint). + properties: + id: + type: string + description: A unique identifier for the completion. + choices: + type: array + description: The list of completion choices the model generated for the input prompt. + items: + type: object + required: + - finish_reason + - index + - logprobs + - text + properties: + finish_reason: + type: string + description: > + The reason the model stopped generating tokens. This will be `stop` if the model hit a + natural stop point or a provided stop sequence, + + `length` if the maximum number of tokens specified in the request was reached, + + or `content_filter` if content was omitted due to a flag from our content filters. + enum: + - stop + - length + - content_filter + index: + type: integer + logprobs: + anyOf: + - type: object + properties: + text_offset: + type: array + items: + type: integer + token_logprobs: + type: array + items: + type: number + tokens: + type: array + items: + type: string + top_logprobs: + type: array + items: + type: object + additionalProperties: + type: number + - type: 'null' + text: + type: string + created: + type: integer + description: The Unix timestamp (in seconds) of when the completion was created. + model: + type: string + description: The model used for completion. + system_fingerprint: + type: string + description: > + This fingerprint represents the backend configuration that the model runs with. + + + Can be used in conjunction with the `seed` request parameter to understand when backend changes + have been made that might impact determinism. + object: + type: string + description: The object type, which is always "text_completion" + enum: + - text_completion + x-stainless-const: true + usage: + $ref: '#/components/schemas/CompletionUsage' + required: + - id + - object + - created + - model + - choices + x-oaiMeta: + name: The completion object + legacy: true + example: | + { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "gpt-4-turbo", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } + } + CreateContainerBody: + type: object + properties: + name: + type: string + description: Name of the container to create. + file_ids: + type: array + description: IDs of files to copy to the container. + items: + type: string + expires_after: + type: object + description: Container expiration time in seconds relative to the 'anchor' time. + properties: + anchor: + type: string + enum: + - last_active_at + description: Time anchor for the expiration time. Currently only 'last_active_at' is supported. + minutes: + type: integer + required: + - anchor + - minutes + required: + - name + CreateContainerFileBody: + type: object + properties: + file_id: + type: string + description: Name of the file to create. + file: + description: | + The File object (not file name) to be uploaded. + type: string + format: binary + required: [] + CreateEmbeddingRequest: + type: object + additionalProperties: false + properties: + input: + description: > + Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single + request, pass an array of strings or array of token arrays. The input must not exceed the max + input tokens for the model (8192 tokens for all embedding models), cannot be an empty string, and + any array must be 2048 dimensions or less. [Example Python + code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + In addition to the per-input token limit, all embedding models enforce a maximum of 300,000 + tokens summed across all inputs in a single request. + example: The quick brown fox jumped over the lazy dog + anyOf: + - type: string + title: string + description: The string that will be turned into an embedding. + default: '' + example: This is a test. + - type: array + title: Array of strings + description: The array of strings that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: string + default: '' + example: '[''This is a test.'']' + - type: array + title: Array of tokens + description: The array of integers that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: integer + - type: array + title: Array of token arrays + description: The array of arrays containing integers that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: array + minItems: 1 + items: + type: integer + model: + description: > + ID of the model to use. You can use the [List + models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your + available models, or see our [Model overview](https://platform.openai.com/docs/models) for + descriptions of them. + example: text-embedding-3-small + anyOf: + - type: string + - type: string + enum: + - text-embedding-ada-002 + - text-embedding-3-small + - text-embedding-3-large + x-stainless-nominal: false + x-oaiTypeLabel: string + encoding_format: + description: >- + The format to return the embeddings in. Can be either `float` or + [`base64`](https://pypi.org/project/pybase64/). + example: float + default: float + type: string + enum: + - float + - base64 + dimensions: + description: > + The number of dimensions the resulting output embeddings should have. Only supported in + `text-embedding-3` and later models. + type: integer + minimum: 1 + user: + type: string + example: user-1234 + description: > + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + required: + - model + - input + CreateEmbeddingResponse: + type: object + properties: + data: + type: array + description: The list of embeddings generated by the model. + items: + $ref: '#/components/schemas/Embedding' + model: + type: string + description: The name of the model used to generate the embedding. + object: + type: string + description: The object type, which is always "list". + enum: + - list + x-stainless-const: true + usage: + type: object + description: The usage information for the request. + properties: + prompt_tokens: + type: integer + description: The number of tokens used by the prompt. + total_tokens: + type: integer + description: The total number of tokens used by the request. + required: + - prompt_tokens + - total_tokens + required: + - object + - model + - data + - usage + CreateEvalCompletionsRunDataSource: + type: object + title: CompletionsRunDataSource + description: | + A CompletionsRunDataSource object describing a model sampling configuration. + properties: + type: + type: string + enum: + - completions + default: completions + description: The type of run data source. Always `completions`. + input_messages: + description: >- + Used when sampling from a model. Dictates the structure of the messages passed into the model. Can + either be a reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template with + variable references to the `item` namespace. + anyOf: + - type: object + title: TemplateInputMessages + properties: + type: + type: string + enum: + - template + description: The type of input messages. Always `template`. + template: + type: array + description: >- + A list of chat messages forming the prompt or context. May include variable references to + the `item` namespace, ie {{item.name}}. + items: + anyOf: + - $ref: '#/components/schemas/EasyInputMessage' + - $ref: '#/components/schemas/EvalItem' + required: + - type + - template + - type: object + title: ItemReferenceInputMessages + properties: + type: + type: string + enum: + - item_reference + description: The type of input messages. Always `item_reference`. + item_reference: + type: string + description: A reference to a variable in the `item` namespace. Ie, "item.input_trajectory" + required: + - type + - item_reference + discriminator: + propertyName: type + sampling_params: + type: object + properties: + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + temperature: + type: number + description: A higher temperature increases randomness in the outputs. + default: 1 + max_completion_tokens: + type: integer + description: The maximum number of tokens in the generated output. + top_p: + type: number + description: An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + default: 1 + seed: + type: integer + description: A seed value to initialize the randomness, during sampling. + default: 42 + response_format: + description: | + An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + Structured Outputs which ensures the model will match your supplied JSON + schema. Learn more in the [Structured Outputs + guide](https://platform.openai.com/docs/guides/structured-outputs). + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` + is preferred for models that support it. + anyOf: + - $ref: '#/components/schemas/ResponseFormatText' + - $ref: '#/components/schemas/ResponseFormatJsonSchema' + - $ref: '#/components/schemas/ResponseFormatJsonObject' + tools: + type: array + description: > + A list of tools the model may call. Currently, only functions are supported as a tool. Use + this to provide a list of functions the model may generate JSON inputs for. A max of 128 + functions are supported. + items: + $ref: '#/components/schemas/ChatCompletionTool' + model: + type: string + description: The name of the model to use for generating completions (e.g. "o3-mini"). + source: + description: Determines what populates the `item` namespace in this run's data source. + anyOf: + - $ref: '#/components/schemas/EvalJsonlFileContentSource' + - $ref: '#/components/schemas/EvalJsonlFileIdSource' + - $ref: '#/components/schemas/EvalStoredCompletionsSource' + discriminator: + propertyName: type + required: + - type + - source + x-oaiMeta: + name: The completions data source object used to configure an individual run + group: eval runs + example: | + { + "name": "gpt-4o-mini-2024-07-18", + "data_source": { + "type": "completions", + "input_messages": { + "type": "item_reference", + "item_reference": "item.input" + }, + "model": "gpt-4o-mini-2024-07-18", + "source": { + "type": "stored_completions", + "model": "gpt-4o-mini-2024-07-18" + } + } + } + CreateEvalCustomDataSourceConfig: + type: object + title: CustomDataSourceConfig + description: > + A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation + runs. + + This schema is used to define the shape of the data that will be: + + - Used to define your testing criteria and + + - What data is required when creating a run + properties: + type: + type: string + enum: + - custom + default: custom + description: The type of data source. Always `custom`. + x-stainless-const: true + item_schema: + type: object + description: The json schema for each row in the data source. + additionalProperties: true + include_sample_schema: + type: boolean + default: false + description: >- + Whether the eval should expect you to populate the sample namespace (ie, by generating responses + off of your data source) + required: + - item_schema + - type + x-oaiMeta: + name: The eval file data source config object + group: evals + example: | + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name", "age"] + }, + "include_sample_schema": true + } + CreateEvalItem: + title: CreateEvalItem + description: >- + A chat message that makes up the prompt or context. May include variable references to the `item` + namespace, ie {{item.name}}. + type: object + x-oaiMeta: + name: The chat message object used to configure an individual run + anyOf: + - type: object + title: SimpleInputMessage + properties: + role: + type: string + description: The role of the message (e.g. "system", "assistant", "user"). + content: + type: string + description: The content of the message. + required: + - role + - content + - $ref: '#/components/schemas/EvalItem' + CreateEvalJsonlRunDataSource: + type: object + title: JsonlRunDataSource + description: | + A JsonlRunDataSource object with that specifies a JSONL file that matches the eval + properties: + type: + type: string + enum: + - jsonl + default: jsonl + description: The type of data source. Always `jsonl`. + x-stainless-const: true + source: + description: Determines what populates the `item` namespace in the data source. + anyOf: + - $ref: '#/components/schemas/EvalJsonlFileContentSource' + - $ref: '#/components/schemas/EvalJsonlFileIdSource' + discriminator: + propertyName: type + required: + - type + - source + x-oaiMeta: + name: The file data source object for the eval run configuration + group: evals + example: | + { + "type": "jsonl", + "source": { + "type": "file_id", + "id": "file-9GYS6xbkWgWhmE7VoLUWFg" + } + } + CreateEvalLabelModelGrader: + type: object + title: LabelModelGrader + description: | + A LabelModelGrader object which uses a model to assign labels to each item + in the evaluation. + properties: + type: + description: The object type, which is always `label_model`. + type: string + enum: + - label_model + x-stainless-const: true + name: + type: string + description: The name of the grader. + model: + type: string + description: The model to use for the evaluation. Must support structured outputs. + input: + type: array + description: >- + A list of chat messages forming the prompt or context. May include variable references to the + `item` namespace, ie {{item.name}}. + items: + $ref: '#/components/schemas/CreateEvalItem' + labels: + type: array + items: + type: string + description: The labels to classify to each item in the evaluation. + passing_labels: + type: array + items: + type: string + description: The labels that indicate a passing result. Must be a subset of labels. + required: + - type + - model + - input + - passing_labels + - labels + - name + x-oaiMeta: + name: The eval label model grader object + group: evals + example: | + { + "type": "label_model", + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'" + }, + { + "role": "user", + "content": "Statement: {{item.response}}" + } + ], + "passing_labels": ["positive"], + "labels": ["positive", "neutral", "negative"], + "name": "Sentiment label grader" + } + CreateEvalLogsDataSourceConfig: + type: object + title: LogsDataSourceConfig + description: | + A data source config which specifies the metadata property of your logs query. + This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. + properties: + type: + type: string + enum: + - logs + default: logs + description: The type of data source. Always `logs`. + x-stainless-const: true + metadata: + type: object + description: Metadata filters for the logs data source. + additionalProperties: true + required: + - type + x-oaiMeta: + name: The logs data source object for evals + group: evals + example: | + { + "type": "logs", + "metadata": { + "use_case": "customer_support_agent" + } + } + CreateEvalRequest: + type: object + title: CreateEvalRequest + properties: + name: + type: string + description: The name of the evaluation. + metadata: + $ref: '#/components/schemas/Metadata' + data_source_config: + type: object + description: >- + The configuration for the data source used for the evaluation runs. Dictates the schema of the + data used in the evaluation. + anyOf: + - $ref: '#/components/schemas/CreateEvalCustomDataSourceConfig' + - $ref: '#/components/schemas/CreateEvalLogsDataSourceConfig' + - $ref: '#/components/schemas/CreateEvalStoredCompletionsDataSourceConfig' + discriminator: + propertyName: type + testing_criteria: + type: array + description: >- + A list of graders for all eval runs in this group. Graders can reference variables in the data + source using double curly braces notation, like `{{item.variable_name}}`. To reference the model's + output, use the `sample` namespace (ie, `{{sample.output_text}}`). + items: + anyOf: + - $ref: '#/components/schemas/CreateEvalLabelModelGrader' + - $ref: '#/components/schemas/EvalGraderStringCheck' + - $ref: '#/components/schemas/EvalGraderTextSimilarity' + - $ref: '#/components/schemas/EvalGraderPython' + - $ref: '#/components/schemas/EvalGraderScoreModel' + discriminator: + propertyName: type + required: + - data_source_config + - testing_criteria + CreateEvalResponsesRunDataSource: + type: object + title: CreateEvalResponsesRunDataSource + description: | + A ResponsesRunDataSource object describing a model sampling configuration. + properties: + type: + type: string + enum: + - responses + default: responses + description: The type of run data source. Always `responses`. + input_messages: + description: >- + Used when sampling from a model. Dictates the structure of the messages passed into the model. Can + either be a reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template with + variable references to the `item` namespace. + anyOf: + - type: object + title: InputMessagesTemplate + properties: + type: + type: string + enum: + - template + description: The type of input messages. Always `template`. + template: + type: array + description: >- + A list of chat messages forming the prompt or context. May include variable references to + the `item` namespace, ie {{item.name}}. + items: + anyOf: + - type: object + title: ChatMessage + properties: + role: + type: string + description: The role of the message (e.g. "system", "assistant", "user"). + content: + type: string + description: The content of the message. + required: + - role + - content + - $ref: '#/components/schemas/EvalItem' + required: + - type + - template + - type: object + title: InputMessagesItemReference + properties: + type: + type: string + enum: + - item_reference + description: The type of input messages. Always `item_reference`. + item_reference: + type: string + description: A reference to a variable in the `item` namespace. Ie, "item.name" + required: + - type + - item_reference + discriminator: + propertyName: type + sampling_params: + type: object + properties: + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + temperature: + type: number + description: A higher temperature increases randomness in the outputs. + default: 1 + max_completion_tokens: + type: integer + description: The maximum number of tokens in the generated output. + top_p: + type: number + description: An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + default: 1 + seed: + type: integer + description: A seed value to initialize the randomness, during sampling. + default: 42 + tools: + type: array + description: | + An array of tools the model may call while generating a response. You + can specify which tool to use by setting the `tool_choice` parameter. + + The two categories of tools you can provide the model are: + + - **Built-in tools**: Tools that are provided by OpenAI that extend the + model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **Function calls (custom tools)**: Functions that are defined by you, + enabling the model to call your own code. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). + items: + $ref: '#/components/schemas/Tool' + text: + type: object + description: | + Configuration options for a text response from the model. Can be plain + text or structured JSON data. Learn more: + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + properties: + format: + $ref: '#/components/schemas/TextResponseFormatConfiguration' + model: + type: string + description: The name of the model to use for generating completions (e.g. "o3-mini"). + source: + description: Determines what populates the `item` namespace in this run's data source. + anyOf: + - $ref: '#/components/schemas/EvalJsonlFileContentSource' + - $ref: '#/components/schemas/EvalJsonlFileIdSource' + - $ref: '#/components/schemas/EvalResponsesSource' + discriminator: + propertyName: type + required: + - type + - source + x-oaiMeta: + name: The completions data source object used to configure an individual run + group: eval runs + example: | + { + "name": "gpt-4o-mini-2024-07-18", + "data_source": { + "type": "responses", + "input_messages": { + "type": "item_reference", + "item_reference": "item.input" + }, + "model": "gpt-4o-mini-2024-07-18", + "source": { + "type": "responses", + "model": "gpt-4o-mini-2024-07-18" + } + } + } + CreateEvalRunRequest: + type: object + title: CreateEvalRunRequest + properties: + name: + type: string + description: The name of the run. + metadata: + $ref: '#/components/schemas/Metadata' + data_source: + type: object + description: Details about the run's data source. + anyOf: + - $ref: '#/components/schemas/CreateEvalJsonlRunDataSource' + - $ref: '#/components/schemas/CreateEvalCompletionsRunDataSource' + - $ref: '#/components/schemas/CreateEvalResponsesRunDataSource' + required: + - data_source + CreateEvalStoredCompletionsDataSourceConfig: + type: object + title: StoredCompletionsDataSourceConfig + description: | + Deprecated in favor of LogsDataSourceConfig. + properties: + type: + type: string + enum: + - stored_completions + default: stored_completions + description: The type of data source. Always `stored_completions`. + x-stainless-const: true + metadata: + type: object + description: Metadata filters for the stored completions data source. + additionalProperties: true + required: + - type + deprecated: true + x-oaiMeta: + name: The stored completions data source object for evals + group: evals + example: | + { + "type": "stored_completions", + "metadata": { + "use_case": "customer_support_agent" + } + } + CreateFileRequest: + type: object + additionalProperties: false + properties: + file: + description: | + The File object (not file name) to be uploaded. + type: string + format: binary + x-oaiMeta: + exampleFilePath: fine-tune.jsonl + purpose: + $ref: '#/components/schemas/FilePurpose' + expires_after: + $ref: '#/components/schemas/FileExpirationAfter' + required: + - file + - purpose + CreateFineTuningCheckpointPermissionRequest: + type: object + additionalProperties: false + properties: + project_ids: + type: array + description: The project identifiers to grant access to. + items: + type: string + required: + - project_ids + CreateFineTuningJobRequest: + type: object + properties: + model: + description: > + The name of the model to fine-tune. You can select one of the + + [supported + models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned). + example: gpt-4o-mini + anyOf: + - type: string + - type: string + enum: + - babbage-002 + - davinci-002 + - gpt-3.5-turbo + - gpt-4o-mini + title: Preset + x-oaiTypeLabel: string + training_file: + description: > + The ID of an uploaded file that contains training data. + + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a + file. + + + Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the + purpose `fine-tune`. + + + The contents of the file should differ depending on if the model uses the + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), + [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. + + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization) for more + details. + type: string + example: file-abc123 + hyperparameters: + type: object + description: > + The hyperparameters used for the fine-tuning job. + + This value is now deprecated in favor of `method`, and should be passed in under the `method` + parameter. + properties: + batch_size: + description: | + Number of examples in each batch. A larger batch size means that model parameters + are updated less frequently, but with lower variance. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + title: Auto + - type: integer + minimum: 1 + maximum: 256 + learning_rate_multiplier: + description: | + Scaling factor for the learning rate. A smaller learning rate may be useful to avoid + overfitting. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + title: Auto + - type: number + minimum: 0 + exclusiveMinimum: true + n_epochs: + description: | + The number of epochs to train the model for. An epoch refers to one full cycle + through the training dataset. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + title: Auto + - type: integer + minimum: 1 + maximum: 50 + deprecated: true + suffix: + description: > + A string of up to 64 characters that will be added to your fine-tuned model name. + + + For example, a `suffix` of "custom-model-name" would produce a model name like + `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`. + type: string + minLength: 1 + maxLength: 64 + default: null + nullable: true + validation_file: + description: > + The ID of an uploaded file that contains validation data. + + + If you provide this file, the data is used to generate validation + + metrics periodically during fine-tuning. These metrics can be viewed in + + the fine-tuning results file. + + The same data should not be present in both train and validation files. + + + Your dataset must be formatted as a JSONL file. You must upload your file with the purpose + `fine-tune`. + + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/model-optimization) for more + details. + type: string + nullable: true + example: file-abc123 + integrations: + type: array + description: A list of integrations to enable for your fine-tuning job. + nullable: true + items: + type: object + required: + - type + - wandb + properties: + type: + description: > + The type of integration to enable. Currently, only "wandb" (Weights and Biases) is + supported. + anyOf: + - type: string + enum: + - wandb + x-stainless-const: true + wandb: + type: object + description: > + The settings for your integration with Weights and Biases. This payload specifies the + project that + + metrics will be sent to. Optionally, you can set an explicit display name for your run, add + tags + + to your run, and set a default entity (team, username, etc) to be associated with your run. + required: + - project + properties: + project: + description: | + The name of the project that the new run will be created under. + type: string + example: my-wandb-project + name: + description: | + A display name to set for the run. If not set, we will use the Job ID as the name. + nullable: true + type: string + entity: + description: > + The entity to use for the run. This allows you to set the team or username of the WandB + user that you would + + like associated with the run. If not set, the default entity for the registered WandB + API key is used. + nullable: true + type: string + tags: + description: > + A list of tags to be attached to the newly created run. These tags are passed through + directly to WandB. Some + + default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", + "openai/{ftjob-abcdef}". + type: array + items: + type: string + example: custom-tag + seed: + description: > + The seed controls the reproducibility of the job. Passing in the same seed and job parameters + should produce the same results, but may differ in rare cases. + + If a seed is not specified, one will be generated for you. + type: integer + nullable: true + minimum: 0 + maximum: 2147483647 + example: 42 + method: + $ref: '#/components/schemas/FineTuneMethod' + metadata: + $ref: '#/components/schemas/Metadata' + required: + - model + - training_file + CreateImageEditRequest: + type: object + properties: + image: + anyOf: + - type: string + format: binary + - type: array + maxItems: 16 + items: + type: string + format: binary + description: | + The image(s) to edit. Must be a supported image file or an array of images. + + For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less + than 50MB. You can provide up to 16 images. + + For `dall-e-2`, you can only provide one image, and it should be a square + `png` file less than 4MB. + x-oaiMeta: + exampleFilePath: otter.png + prompt: + description: >- + A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2`, + and 32000 characters for `gpt-image-1`. + type: string + example: A cute baby sea otter wearing a beret + mask: + description: >- + An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where + `image` should be edited. If there are multiple images provided, the mask will be applied on the + first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`. + type: string + format: binary + x-oaiMeta: + exampleFilePath: mask.png + background: + type: string + enum: + - transparent + - opaque + - auto + default: auto + example: transparent + nullable: true + description: | + Allows to set transparency for the background of the generated image(s). + This parameter is only supported for `gpt-image-1`. Must be one of + `transparent`, `opaque` or `auto` (default value). When `auto` is used, the + model will automatically determine the best background for the image. + + If `transparent`, the output format needs to support transparency, so it + should be set to either `png` (default value) or `webp`. + model: + anyOf: + - type: string + - type: string + enum: + - dall-e-2 + - gpt-image-1 + - gpt-image-1-mini + x-stainless-const: true + x-oaiTypeLabel: string + nullable: true + description: >- + The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are supported. Defaults + to `dall-e-2` unless a parameter specific to `gpt-image-1` is used. + 'n': + type: integer + minimum: 1 + maximum: 10 + default: 1 + example: 1 + nullable: true + description: The number of images to generate. Must be between 1 and 10. + size: + type: string + enum: + - 256x256 + - 512x512 + - 1024x1024 + - 1536x1024 + - 1024x1536 + - auto + default: 1024x1024 + example: 1024x1024 + nullable: true + description: >- + The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` + (portrait), or `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or + `1024x1024` for `dall-e-2`. + response_format: + type: string + enum: + - url + - b64_json + default: url + example: url + nullable: true + description: >- + The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs + are only valid for 60 minutes after the image has been generated. This parameter is only supported + for `dall-e-2`, as `gpt-image-1` will always return base64-encoded images. + output_format: + type: string + enum: + - png + - jpeg + - webp + default: png + example: png + nullable: true + description: | + The format in which the generated images are returned. This parameter is + only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. + The default value is `png`. + output_compression: + type: integer + default: 100 + example: 100 + nullable: true + description: | + The compression level (0-100%) for the generated images. This parameter + is only supported for `gpt-image-1` with the `webp` or `jpeg` output + formats, and defaults to 100. + user: + type: string + example: user-1234 + description: > + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + input_fidelity: + anyOf: + - $ref: '#/components/schemas/InputFidelity' + - type: 'null' + stream: + type: boolean + default: false + example: false + nullable: true + description: > + Edit the image in streaming mode. Defaults to `false`. See the + + [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more + information. + partial_images: + $ref: '#/components/schemas/PartialImages' + quality: + type: string + enum: + - standard + - low + - medium + - high + - auto + default: auto + example: high + nullable: true + description: > + The quality of the image that will be generated. `high`, `medium` and `low` are only supported for + `gpt-image-1`. `dall-e-2` only supports `standard` quality. Defaults to `auto`. + required: + - prompt + - image + CreateImageRequest: + type: object + properties: + prompt: + description: >- + A text description of the desired image(s). The maximum length is 32000 characters for + `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`. + type: string + example: A cute baby sea otter + model: + anyOf: + - type: string + - type: string + enum: + - dall-e-2 + - dall-e-3 + - gpt-image-1 + - gpt-image-1-mini + x-stainless-nominal: false + x-oaiTypeLabel: string + nullable: true + description: >- + The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults + to `dall-e-2` unless a parameter specific to `gpt-image-1` is used. + 'n': + type: integer + minimum: 1 + maximum: 10 + default: 1 + example: 1 + nullable: true + description: >- + The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is + supported. + quality: + type: string + enum: + - standard + - hd + - low + - medium + - high + - auto + default: auto + example: medium + nullable: true + description: | + The quality of the image that will be generated. + + - `auto` (default value) will automatically select the best quality for the given model. + - `high`, `medium` and `low` are supported for `gpt-image-1`. + - `hd` and `standard` are supported for `dall-e-3`. + - `standard` is the only option for `dall-e-2`. + response_format: + type: string + enum: + - url + - b64_json + default: url + example: url + nullable: true + description: >- + The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of + `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This + parameter isn't supported for `gpt-image-1` which will always return base64-encoded images. + output_format: + type: string + enum: + - png + - jpeg + - webp + default: png + example: png + nullable: true + description: >- + The format in which the generated images are returned. This parameter is only supported for + `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. + output_compression: + type: integer + default: 100 + example: 100 + nullable: true + description: >- + The compression level (0-100%) for the generated images. This parameter is only supported for + `gpt-image-1` with the `webp` or `jpeg` output formats, and defaults to 100. + stream: + type: boolean + default: false + example: false + nullable: true + description: > + Generate the image in streaming mode. Defaults to `false`. See the + + [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more + information. + + This parameter is only supported for `gpt-image-1`. + partial_images: + $ref: '#/components/schemas/PartialImages' + size: + type: string + enum: + - auto + - 1024x1024 + - 1536x1024 + - 1024x1536 + - 256x256 + - 512x512 + - 1792x1024 + - 1024x1792 + default: auto + example: 1024x1024 + nullable: true + description: >- + The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` + (portrait), or `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or + `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`. + moderation: + type: string + enum: + - low + - auto + default: auto + example: low + nullable: true + description: >- + Control the content-moderation level for images generated by `gpt-image-1`. Must be either `low` + for less restrictive filtering or `auto` (default value). + background: + type: string + enum: + - transparent + - opaque + - auto + default: auto + example: transparent + nullable: true + description: | + Allows to set transparency for the background of the generated image(s). + This parameter is only supported for `gpt-image-1`. Must be one of + `transparent`, `opaque` or `auto` (default value). When `auto` is used, the + model will automatically determine the best background for the image. + + If `transparent`, the output format needs to support transparency, so it + should be set to either `png` (default value) or `webp`. + style: + type: string + enum: + - vivid + - natural + default: vivid + example: vivid + nullable: true + description: >- + The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of + `vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic + images. Natural causes the model to produce more natural, less hyper-real looking images. + user: + type: string + example: user-1234 + description: > + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + required: + - prompt + CreateImageVariationRequest: + type: object + properties: + image: + description: >- + The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and + square. + type: string + format: binary + x-oaiMeta: + exampleFilePath: otter.png + model: + anyOf: + - type: string + - type: string + enum: + - dall-e-2 + x-stainless-const: true + x-oaiTypeLabel: string + nullable: true + description: The model to use for image generation. Only `dall-e-2` is supported at this time. + 'n': + type: integer + minimum: 1 + maximum: 10 + default: 1 + example: 1 + nullable: true + description: The number of images to generate. Must be between 1 and 10. + response_format: + type: string + enum: + - url + - b64_json + default: url + example: url + nullable: true + description: >- + The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs + are only valid for 60 minutes after the image has been generated. + size: + type: string + enum: + - 256x256 + - 512x512 + - 1024x1024 + default: 1024x1024 + example: 1024x1024 + nullable: true + description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`. + user: + type: string + example: user-1234 + description: > + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). + required: + - image + CreateMessageRequest: + type: object + additionalProperties: false + required: + - role + - content + properties: + role: + type: string + enum: + - user + - assistant + description: > + The role of the entity that is creating the message. Allowed values include: + + - `user`: Indicates the message is sent by an actual user and should be used in most cases to + represent user-generated messages. + + - `assistant`: Indicates the message is generated by the assistant. Use this value to insert + messages from the assistant into the conversation. + content: + anyOf: + - type: string + description: The text contents of the message. + title: Text content + - type: array + description: >- + An array of content parts with a defined type, each can be of type `text` or images can be + passed with `image_url` or `image_file`. Image types are only supported on [Vision-compatible + models](https://platform.openai.com/docs/models). + title: Array of content parts + items: + anyOf: + - $ref: '#/components/schemas/MessageContentImageFileObject' + - $ref: '#/components/schemas/MessageContentImageUrlObject' + - $ref: '#/components/schemas/MessageRequestContentTextObject' + discriminator: + propertyName: type + minItems: 1 + attachments: + anyOf: + - type: array + items: + type: object + properties: + file_id: + type: string + description: The ID of the file to attach to the message. + tools: + description: The tools to add this file to. + type: array + items: + anyOf: + - $ref: '#/components/schemas/AssistantToolsCode' + - $ref: '#/components/schemas/AssistantToolsFileSearchTypeOnly' + discriminator: + propertyName: type + description: A list of files attached to the message, and the tools they should be added to. + required: + - file_id + - tools + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + CreateModelResponseProperties: + allOf: + - $ref: '#/components/schemas/ModelResponseProperties' + - type: object + properties: + top_logprobs: + description: | + An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + type: integer + minimum: 0 + maximum: 20 + CreateModerationRequest: + type: object + properties: + input: + description: | + Input (or inputs) to classify. Can be a single string, an array of strings, or + an array of multi-modal input objects similar to other models. + anyOf: + - type: string + description: A string of text to classify for moderation. + default: '' + example: I want to kill them. + - type: array + description: An array of strings to classify for moderation. + items: + type: string + default: '' + example: I want to kill them. + - type: array + description: An array of multi-modal inputs to the moderation model. + items: + anyOf: + - $ref: '#/components/schemas/ModerationImageURLInput' + - $ref: '#/components/schemas/ModerationTextInput' + discriminator: + propertyName: type + title: Moderation Multi Modal Array + model: + description: | + The content moderation model you would like to use. Learn more in + [the moderation guide](https://platform.openai.com/docs/guides/moderation), and learn about + available models [here](https://platform.openai.com/docs/models#moderation). + nullable: false + anyOf: + - type: string + - type: string + enum: + - omni-moderation-latest + - omni-moderation-2024-09-26 + - text-moderation-latest + - text-moderation-stable + x-stainless-nominal: false + x-oaiTypeLabel: string + required: + - input + CreateModerationResponse: + type: object + description: Represents if a given text input is potentially harmful. + properties: + id: + type: string + description: The unique identifier for the moderation request. + model: + type: string + description: The model used to generate the moderation results. + results: + type: array + description: A list of moderation objects. + items: + type: object + properties: + flagged: + type: boolean + description: Whether any of the below categories are flagged. + categories: + type: object + description: A list of the categories, and whether they are flagged or not. + properties: + hate: + type: boolean + description: >- + Content that expresses, incites, or promotes hate based on race, gender, ethnicity, + religion, nationality, sexual orientation, disability status, or caste. Hateful content + aimed at non-protected groups (e.g., chess players) is harassment. + hate/threatening: + type: boolean + description: >- + Hateful content that also includes violence or serious harm towards the targeted group + based on race, gender, ethnicity, religion, nationality, sexual orientation, disability + status, or caste. + harassment: + type: boolean + description: Content that expresses, incites, or promotes harassing language towards any target. + harassment/threatening: + type: boolean + description: Harassment content that also includes violence or serious harm towards any target. + illicit: + anyOf: + - type: boolean + description: >- + Content that includes instructions or advice that facilitate the planning or + execution of wrongdoing, or that gives advice or instruction on how to commit + illicit acts. For example, "how to shoplift" would fit this category. + - type: 'null' + illicit/violent: + anyOf: + - type: boolean + description: >- + Content that includes instructions or advice that facilitate the planning or + execution of wrongdoing that also includes violence, or that gives advice or + instruction on the procurement of any weapon. + - type: 'null' + self-harm: + type: boolean + description: >- + Content that promotes, encourages, or depicts acts of self-harm, such as suicide, + cutting, and eating disorders. + self-harm/intent: + type: boolean + description: >- + Content where the speaker expresses that they are engaging or intend to engage in acts + of self-harm, such as suicide, cutting, and eating disorders. + self-harm/instructions: + type: boolean + description: >- + Content that encourages performing acts of self-harm, such as suicide, cutting, and + eating disorders, or that gives instructions or advice on how to commit such acts. + sexual: + type: boolean + description: >- + Content meant to arouse sexual excitement, such as the description of sexual activity, + or that promotes sexual services (excluding sex education and wellness). + sexual/minors: + type: boolean + description: Sexual content that includes an individual who is under 18 years old. + violence: + type: boolean + description: Content that depicts death, violence, or physical injury. + violence/graphic: + type: boolean + description: Content that depicts death, violence, or physical injury in graphic detail. + required: + - hate + - hate/threatening + - harassment + - harassment/threatening + - illicit + - illicit/violent + - self-harm + - self-harm/intent + - self-harm/instructions + - sexual + - sexual/minors + - violence + - violence/graphic + category_scores: + type: object + description: A list of the categories along with their scores as predicted by model. + properties: + hate: + type: number + description: The score for the category 'hate'. + hate/threatening: + type: number + description: The score for the category 'hate/threatening'. + harassment: + type: number + description: The score for the category 'harassment'. + harassment/threatening: + type: number + description: The score for the category 'harassment/threatening'. + illicit: + type: number + description: The score for the category 'illicit'. + illicit/violent: + type: number + description: The score for the category 'illicit/violent'. + self-harm: + type: number + description: The score for the category 'self-harm'. + self-harm/intent: + type: number + description: The score for the category 'self-harm/intent'. + self-harm/instructions: + type: number + description: The score for the category 'self-harm/instructions'. + sexual: + type: number + description: The score for the category 'sexual'. + sexual/minors: + type: number + description: The score for the category 'sexual/minors'. + violence: + type: number + description: The score for the category 'violence'. + violence/graphic: + type: number + description: The score for the category 'violence/graphic'. + required: + - hate + - hate/threatening + - harassment + - harassment/threatening + - illicit + - illicit/violent + - self-harm + - self-harm/intent + - self-harm/instructions + - sexual + - sexual/minors + - violence + - violence/graphic + category_applied_input_types: + type: object + description: A list of the categories along with the input type(s) that the score applies to. + properties: + hate: + type: array + description: The applied input type(s) for the category 'hate'. + items: + type: string + enum: + - text + x-stainless-const: true + hate/threatening: + type: array + description: The applied input type(s) for the category 'hate/threatening'. + items: + type: string + enum: + - text + x-stainless-const: true + harassment: + type: array + description: The applied input type(s) for the category 'harassment'. + items: + type: string + enum: + - text + x-stainless-const: true + harassment/threatening: + type: array + description: The applied input type(s) for the category 'harassment/threatening'. + items: + type: string + enum: + - text + x-stainless-const: true + illicit: + type: array + description: The applied input type(s) for the category 'illicit'. + items: + type: string + enum: + - text + x-stainless-const: true + illicit/violent: + type: array + description: The applied input type(s) for the category 'illicit/violent'. + items: + type: string + enum: + - text + x-stainless-const: true + self-harm: + type: array + description: The applied input type(s) for the category 'self-harm'. + items: + type: string + enum: + - text + - image + self-harm/intent: + type: array + description: The applied input type(s) for the category 'self-harm/intent'. + items: + type: string + enum: + - text + - image + self-harm/instructions: + type: array + description: The applied input type(s) for the category 'self-harm/instructions'. + items: + type: string + enum: + - text + - image + sexual: + type: array + description: The applied input type(s) for the category 'sexual'. + items: + type: string + enum: + - text + - image + sexual/minors: + type: array + description: The applied input type(s) for the category 'sexual/minors'. + items: + type: string + enum: + - text + x-stainless-const: true + violence: + type: array + description: The applied input type(s) for the category 'violence'. + items: + type: string + enum: + - text + - image + violence/graphic: + type: array + description: The applied input type(s) for the category 'violence/graphic'. + items: + type: string + enum: + - text + - image + required: + - hate + - hate/threatening + - harassment + - harassment/threatening + - illicit + - illicit/violent + - self-harm + - self-harm/intent + - self-harm/instructions + - sexual + - sexual/minors + - violence + - violence/graphic + required: + - flagged + - categories + - category_scores + - category_applied_input_types + required: + - id + - model + - results + x-oaiMeta: + name: The moderation object + example: | + { + "id": "modr-0d9740456c391e43c445bf0f010940c7", + "model": "omni-moderation-latest", + "results": [ + { + "flagged": true, + "categories": { + "harassment": true, + "harassment/threatening": true, + "sexual": false, + "hate": false, + "hate/threatening": false, + "illicit": false, + "illicit/violent": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "self-harm": false, + "sexual/minors": false, + "violence": true, + "violence/graphic": true + }, + "category_scores": { + "harassment": 0.8189693396524255, + "harassment/threatening": 0.804985420696006, + "sexual": 1.573112165348997e-6, + "hate": 0.007562942636942845, + "hate/threatening": 0.004208854591835476, + "illicit": 0.030535955153511665, + "illicit/violent": 0.008925306722380033, + "self-harm/intent": 0.00023023930975076432, + "self-harm/instructions": 0.0002293869201073356, + "self-harm": 0.012598046106750154, + "sexual/minors": 2.212566909570261e-8, + "violence": 0.9999992735124786, + "violence/graphic": 0.843064871157054 + }, + "category_applied_input_types": { + "harassment": [ + "text" + ], + "harassment/threatening": [ + "text" + ], + "sexual": [ + "text", + "image" + ], + "hate": [ + "text" + ], + "hate/threatening": [ + "text" + ], + "illicit": [ + "text" + ], + "illicit/violent": [ + "text" + ], + "self-harm/intent": [ + "text", + "image" + ], + "self-harm/instructions": [ + "text", + "image" + ], + "self-harm": [ + "text", + "image" + ], + "sexual/minors": [ + "text" + ], + "violence": [ + "text", + "image" + ], + "violence/graphic": [ + "text", + "image" + ] + } + } + ] + } + CreateResponse: + allOf: + - $ref: '#/components/schemas/CreateModelResponseProperties' + - $ref: '#/components/schemas/ResponseProperties' + - type: object + properties: + input: + $ref: '#/components/schemas/InputParam' + include: + anyOf: + - type: array + description: >- + Specify additional output data to include in the model response. Currently supported + values are: + + - `web_search_call.action.sources`: Include the sources of the web search tool call. + + - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code + interpreter tool call items. + + - `computer_call_output.output.image_url`: Include image urls from the computer call + output. + + - `file_search_call.results`: Include the search results of the file search tool call. + + - `message.input_image.image_url`: Include image urls from the input message. + + - `message.output_text.logprobs`: Include logprobs with assistant messages. + + - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in + reasoning item outputs. This enables reasoning items to be used in multi-turn + conversations when using the Responses API statelessly (like when the `store` parameter is + set to `false`, or when an organization is enrolled in the zero data retention program). + items: + $ref: '#/components/schemas/IncludeEnum' + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + description: | + Whether to allow the model to run tool calls in parallel. + default: true + - type: 'null' + store: + anyOf: + - type: boolean + description: | + Whether to store the generated model response for later retrieval via + API. + default: true + - type: 'null' + instructions: + anyOf: + - type: string + description: | + A system (or developer) message inserted into the model's context. + + When using along with `previous_response_id`, the instructions from a previous + response will not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + - type: 'null' + stream: + anyOf: + - description: > + If set to true, the model response data will be streamed to the client + + as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + + See the [Streaming section + below](https://platform.openai.com/docs/api-reference/responses-streaming) + + for more information. + type: boolean + default: false + - type: 'null' + stream_options: + $ref: '#/components/schemas/ResponseStreamOptions' + conversation: + anyOf: + - $ref: '#/components/schemas/ConversationParam' + - type: 'null' + CreateRunRequest: + type: object + additionalProperties: false + properties: + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + type: string + model: + description: >- + The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute + this run. If a value is provided here, it will override the model associated with the assistant. + If not, the model associated with the assistant will be used. + anyOf: + - type: string + - $ref: '#/components/schemas/AssistantSupportedModels' + x-oaiTypeLabel: string + nullable: true + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + instructions: + description: >- + Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) of the + assistant. This is useful for modifying the behavior on a per-run basis. + type: string + nullable: true + additional_instructions: + description: >- + Appends additional instructions at the end of the instructions for the run. This is useful for + modifying the behavior on a per-run basis without overriding other instructions. + type: string + nullable: true + additional_messages: + description: Adds additional messages to the thread before creating the run. + type: array + items: + $ref: '#/components/schemas/CreateMessageRequest' + nullable: true + tools: + description: >- + Override the tools the assistant can use for this run. This is useful for modifying the behavior + on a per-run basis. + nullable: true + type: array + maxItems: 20 + items: + $ref: '#/components/schemas/AssistantTool' + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the + top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + stream: + type: boolean + nullable: true + description: > + If `true`, returns a stream of events that happen during the Run as server-sent events, + terminating when the Run enters a terminal state with a `data: [DONE]` message. + max_prompt_tokens: + type: integer + nullable: true + description: > + The maximum number of prompt tokens that may be used over the course of the run. The run will make + a best effort to use only the number of prompt tokens specified, across multiple turns of the run. + If the run exceeds the number of prompt tokens specified, the run will end with status + `incomplete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: > + The maximum number of completion tokens that may be used over the course of the run. The run will + make a best effort to use only the number of completion tokens specified, across multiple turns of + the run. If the run exceeds the number of completion tokens specified, the run will end with + status `incomplete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + allOf: + - $ref: '#/components/schemas/TruncationObject' + - nullable: true + tool_choice: + allOf: + - $ref: '#/components/schemas/AssistantsApiToolChoiceOption' + - nullable: true + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + response_format: + $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + nullable: true + required: &ref_0 + - assistant_id + CreateSpeechRequest: + type: object + additionalProperties: false + properties: + model: + description: > + One of the available [TTS models](https://platform.openai.com/docs/models#tts): `tts-1`, + `tts-1-hd` or `gpt-4o-mini-tts`. + anyOf: + - type: string + - type: string + enum: + - tts-1 + - tts-1-hd + - gpt-4o-mini-tts + x-stainless-nominal: false + x-oaiTypeLabel: string + input: + type: string + description: The text to generate audio for. The maximum length is 4096 characters. + maxLength: 4096 + instructions: + type: string + description: >- + Control the voice of your generated audio with additional instructions. Does not work with `tts-1` + or `tts-1-hd`. + maxLength: 4096 + voice: + description: >- + The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `ballad`, + `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and `verse`. Previews of the voices + are available in the [Text to speech + guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options). + $ref: '#/components/schemas/VoiceIdsShared' + response_format: + description: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`. + default: mp3 + type: string + enum: + - mp3 + - opus + - aac + - flac + - wav + - pcm + speed: + description: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is the default. + type: number + default: 1 + minimum: 0.25 + maximum: 4 + stream_format: + description: >- + The format to stream the audio in. Supported formats are `sse` and `audio`. `sse` is not supported + for `tts-1` or `tts-1-hd`. + type: string + default: audio + enum: + - sse + - audio + required: + - model + - input + - voice + CreateSpeechResponseStreamEvent: + anyOf: + - $ref: '#/components/schemas/SpeechAudioDeltaEvent' + - $ref: '#/components/schemas/SpeechAudioDoneEvent' + discriminator: + propertyName: type + CreateThreadAndRunRequest: + type: object + additionalProperties: false + properties: + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + type: string + thread: + $ref: '#/components/schemas/CreateThreadRequest' + model: + description: >- + The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute + this run. If a value is provided here, it will override the model associated with the assistant. + If not, the model associated with the assistant will be used. + anyOf: + - type: string + - type: string + enum: + - gpt-5 + - gpt-5-mini + - gpt-5-nano + - gpt-5-2025-08-07 + - gpt-5-mini-2025-08-07 + - gpt-5-nano-2025-08-07 + - gpt-4.1 + - gpt-4.1-mini + - gpt-4.1-nano + - gpt-4.1-2025-04-14 + - gpt-4.1-mini-2025-04-14 + - gpt-4.1-nano-2025-04-14 + - gpt-4o + - gpt-4o-2024-11-20 + - gpt-4o-2024-08-06 + - gpt-4o-2024-05-13 + - gpt-4o-mini + - gpt-4o-mini-2024-07-18 + - gpt-4.5-preview + - gpt-4.5-preview-2025-02-27 + - gpt-4-turbo + - gpt-4-turbo-2024-04-09 + - gpt-4-0125-preview + - gpt-4-turbo-preview + - gpt-4-1106-preview + - gpt-4-vision-preview + - gpt-4 + - gpt-4-0314 + - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0314 + - gpt-4-32k-0613 + - gpt-3.5-turbo + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-1106 + - gpt-3.5-turbo-0125 + - gpt-3.5-turbo-16k-0613 + x-oaiTypeLabel: string + nullable: true + instructions: + description: >- + Override the default system message of the assistant. This is useful for modifying the behavior on + a per-run basis. + type: string + nullable: true + tools: + description: >- + Override the tools the assistant can use for this run. This is useful for modifying the behavior + on a per-run basis. + nullable: true + type: array + maxItems: 20 + items: + $ref: '#/components/schemas/AssistantTool' + tool_resources: + type: object + description: > + A set of resources that are used by the assistant's tools. The resources are specific to the type + of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the + `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made available + to the `code_interpreter` tool. There can be a maximum of 20 files associated with the + tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The ID of the [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to + this assistant. There can be a maximum of 1 vector store attached to the assistant. + maxItems: 1 + items: + type: string + nullable: true + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the + top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + stream: + type: boolean + nullable: true + description: > + If `true`, returns a stream of events that happen during the Run as server-sent events, + terminating when the Run enters a terminal state with a `data: [DONE]` message. + max_prompt_tokens: + type: integer + nullable: true + description: > + The maximum number of prompt tokens that may be used over the course of the run. The run will make + a best effort to use only the number of prompt tokens specified, across multiple turns of the run. + If the run exceeds the number of prompt tokens specified, the run will end with status + `incomplete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: > + The maximum number of completion tokens that may be used over the course of the run. The run will + make a best effort to use only the number of completion tokens specified, across multiple turns of + the run. If the run exceeds the number of completion tokens specified, the run will end with + status `incomplete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + allOf: + - $ref: '#/components/schemas/TruncationObject' + - nullable: true + tool_choice: + allOf: + - $ref: '#/components/schemas/AssistantsApiToolChoiceOption' + - nullable: true + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + response_format: + $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + nullable: true + required: *ref_0 + CreateThreadRequest: + type: object + description: | + Options to create a new thread. If no thread is provided when running a + request, an empty thread will be created. + additionalProperties: false + properties: + messages: + description: >- + A list of [messages](https://platform.openai.com/docs/api-reference/messages) to start the thread + with. + type: array + items: + $ref: '#/components/schemas/CreateMessageRequest' + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are made available to the assistant's tools in this thread. The + resources are specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this thread. There can be a maximum of 1 vector store attached to the thread. + maxItems: 1 + items: + type: string + vector_stores: + type: array + description: > + A helper to create a [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) with + file_ids and attach it to this thread. There can be a maximum of 1 vector store + attached to the thread. + maxItems: 1 + items: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to + add to the vector store. There can be a maximum of 10000 files in a vector + store. + maxItems: 10000 + items: + type: string + chunking_strategy: + type: object + description: >- + The chunking strategy used to chunk the file(s). If not set, will use the `auto` + strategy. + anyOf: + - type: object + title: Auto Chunking Strategy + description: >- + The default strategy. This strategy currently uses a `max_chunk_size_tokens` + of `800` and `chunk_overlap_tokens` of `400`. + additionalProperties: false + properties: + type: + type: string + description: Always `auto`. + enum: + - auto + x-stainless-const: true + required: + - type + - type: object + title: Static Chunking Strategy + additionalProperties: false + properties: + type: + type: string + description: Always `static`. + enum: + - static + x-stainless-const: true + static: + type: object + additionalProperties: false + properties: + max_chunk_size_tokens: + type: integer + minimum: 100 + maximum: 4096 + description: >- + The maximum number of tokens in each chunk. The default value is + `800`. The minimum value is `100` and the maximum value is `4096`. + chunk_overlap_tokens: + type: integer + description: > + The number of tokens that overlap between chunks. The default value + is `400`. + + + Note that the overlap must not exceed half of + `max_chunk_size_tokens`. + required: + - max_chunk_size_tokens + - chunk_overlap_tokens + required: + - type + - static + discriminator: + propertyName: type + metadata: + $ref: '#/components/schemas/Metadata' + anyOf: + - required: + - vector_store_ids + - required: + - vector_stores + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + CreateTranscriptionRequest: + type: object + additionalProperties: false + properties: + file: + description: > + The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, + mpeg, mpga, m4a, ogg, wav, or webm. + type: string + x-oaiTypeLabel: file + format: binary + x-oaiMeta: + exampleFilePath: speech.mp3 + model: + description: > + ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `whisper-1` + (which is powered by our open source Whisper V2 model), and `gpt-4o-transcribe-diarize`. + example: gpt-4o-transcribe + anyOf: + - type: string + - type: string + enum: + - whisper-1 + - gpt-4o-transcribe + - gpt-4o-mini-transcribe + - gpt-4o-transcribe-diarize + x-stainless-const: true + x-stainless-nominal: false + x-oaiTypeLabel: string + language: + description: > + The language of the input audio. Supplying the input language in + [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will improve + accuracy and latency. + type: string + prompt: + description: > + An optional text to guide the model's style or continue a previous audio segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match the audio + language. This field is not supported when using `gpt-4o-transcribe-diarize`. + type: string + response_format: + $ref: '#/components/schemas/AudioResponseFormat' + temperature: + description: > + The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically + increase the temperature until certain thresholds are hit. + type: number + default: 0 + include: + description: > + Additional information to include in the transcription response. + + `logprobs` will return the log probabilities of the tokens in the + + response to understand the model's confidence in the transcription. + + `logprobs` only works with response_format set to `json` and only with + + the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not supported when + using `gpt-4o-transcribe-diarize`. + type: array + items: + $ref: '#/components/schemas/TranscriptionInclude' + timestamp_granularities: + description: > + The timestamp granularities to populate for this transcription. `response_format` must be set + `verbose_json` to use timestamp granularities. Either or both of these options are supported: + `word`, or `segment`. Note: There is no additional latency for segment timestamps, but generating + word timestamps incurs additional latency. + + This option is not available for `gpt-4o-transcribe-diarize`. + type: array + items: + type: string + enum: + - word + - segment + default: + - segment + stream: + anyOf: + - description: > + If set to true, the model response data will be streamed to the client + + as it is generated using [server-sent + events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). + + See the [Streaming section of the Speech-to-Text + guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) + + for more information. + + + Note: Streaming is not supported for the `whisper-1` model and will be ignored. + type: boolean + default: false + - type: 'null' + chunking_strategy: + $ref: '#/components/schemas/TranscriptionChunkingStrategy' + known_speaker_names: + description: > + Optional list of speaker names that correspond to the audio samples provided in + `known_speaker_references[]`. Each entry should be a short identifier (for example `customer` or + `agent`). Up to 4 speakers are supported. + type: array + maxItems: 4 + items: + type: string + known_speaker_references: + description: > + Optional list of audio samples (as [data + URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs)) that contain + known speaker references matching `known_speaker_names[]`. Each sample must be between 2 and 10 + seconds, and can use any of the same input audio formats supported by `file`. + type: array + maxItems: 4 + items: + type: string + required: + - file + - model + CreateTranscriptionResponseDiarizedJson: + type: object + description: > + Represents a diarized transcription response returned by the model, including the combined transcript + and speaker-segment annotations. + properties: + task: + type: string + description: The type of task that was run. Always `transcribe`. + enum: + - transcribe + x-stainless-const: true + duration: + type: number + description: Duration of the input audio in seconds. + text: + type: string + description: The concatenated transcript text for the entire audio input. + segments: + type: array + description: Segments of the transcript annotated with timestamps and speaker labels. + items: + $ref: '#/components/schemas/TranscriptionDiarizedSegment' + usage: + type: object + description: Token or duration usage statistics for the request. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/TranscriptTextUsageTokens' + title: Token Usage + - $ref: '#/components/schemas/TranscriptTextUsageDuration' + title: Duration Usage + required: + - task + - duration + - text + - segments + x-oaiMeta: + name: The transcription object (Diarized JSON) + group: audio + example: | + { + "task": "transcribe", + "duration": 42.7, + "text": "Agent: Thanks for calling OpenAI support.\nCustomer: Hi, I need help with diarization.", + "segments": [ + { + "type": "transcript.text.segment", + "id": "seg_001", + "start": 0.0, + "end": 5.2, + "text": "Thanks for calling OpenAI support.", + "speaker": "agent" + }, + { + "type": "transcript.text.segment", + "id": "seg_002", + "start": 5.2, + "end": 12.8, + "text": "Hi, I need help with diarization.", + "speaker": "A" + } + ], + "usage": { + "type": "duration", + "seconds": 43 + } + } + CreateTranscriptionResponseJson: + type: object + description: Represents a transcription response returned by model, based on the provided input. + properties: + text: + type: string + description: The transcribed text. + logprobs: + type: array + optional: true + description: > + The log probabilities of the tokens in the transcription. Only returned with the models + `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added to the `include` array. + items: + type: object + properties: + token: + type: string + description: The token in the transcription. + logprob: + type: number + description: The log probability of the token. + bytes: + type: array + items: + type: number + description: The bytes of the token. + usage: + type: object + description: Token usage statistics for the request. + anyOf: + - $ref: '#/components/schemas/TranscriptTextUsageTokens' + title: Token Usage + - $ref: '#/components/schemas/TranscriptTextUsageDuration' + title: Duration Usage + discriminator: + propertyName: type + required: + - text + x-oaiMeta: + name: The transcription object (JSON) + group: audio + example: | + { + "text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that.", + "usage": { + "type": "tokens", + "input_tokens": 14, + "input_token_details": { + "text_tokens": 10, + "audio_tokens": 4 + }, + "output_tokens": 101, + "total_tokens": 115 + } + } + CreateTranscriptionResponseStreamEvent: + anyOf: + - $ref: '#/components/schemas/TranscriptTextSegmentEvent' + - $ref: '#/components/schemas/TranscriptTextDeltaEvent' + - $ref: '#/components/schemas/TranscriptTextDoneEvent' + discriminator: + propertyName: type + CreateTranscriptionResponseVerboseJson: + type: object + description: Represents a verbose json transcription response returned by model, based on the provided input. + properties: + language: + type: string + description: The language of the input audio. + duration: + type: number + description: The duration of the input audio. + text: + type: string + description: The transcribed text. + words: + type: array + description: Extracted words and their corresponding timestamps. + items: + $ref: '#/components/schemas/TranscriptionWord' + segments: + type: array + description: Segments of the transcribed text and their corresponding details. + items: + $ref: '#/components/schemas/TranscriptionSegment' + usage: + $ref: '#/components/schemas/TranscriptTextUsageDuration' + required: + - language + - duration + - text + x-oaiMeta: + name: The transcription object (Verbose JSON) + group: audio + example: | + { + "task": "transcribe", + "language": "english", + "duration": 8.470000267028809, + "text": "The beach was a popular spot on a hot summer day. People were swimming in the ocean, building sandcastles, and playing beach volleyball.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 3.319999933242798, + "text": " The beach was a popular spot on a hot summer day.", + "tokens": [ + 50364, 440, 7534, 390, 257, 3743, 4008, 322, 257, 2368, 4266, 786, 13, 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.2860786020755768, + "compression_ratio": 1.2363636493682861, + "no_speech_prob": 0.00985979475080967 + }, + ... + ], + "usage": { + "type": "duration", + "seconds": 9 + } + } + CreateTranslationRequest: + type: object + additionalProperties: false + properties: + file: + description: > + The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, + mpga, m4a, ogg, wav, or webm. + type: string + x-oaiTypeLabel: file + format: binary + x-oaiMeta: + exampleFilePath: speech.mp3 + model: + description: > + ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is + currently available. + example: whisper-1 + anyOf: + - type: string + - type: string + enum: + - whisper-1 + x-stainless-const: true + x-oaiTypeLabel: string + prompt: + description: > + An optional text to guide the model's style or continue a previous audio segment. The + [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should be in English. + type: string + response_format: + description: > + The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, or + `vtt`. + type: string + enum: + - json + - text + - srt + - verbose_json + - vtt + default: json + temperature: + description: > + The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more + random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the + model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically + increase the temperature until certain thresholds are hit. + type: number + default: 0 + required: + - file + - model + CreateTranslationResponseJson: + type: object + properties: + text: + type: string + required: + - text + CreateTranslationResponseVerboseJson: + type: object + properties: + language: + type: string + description: The language of the output translation (always `english`). + duration: + type: number + description: The duration of the input audio. + text: + type: string + description: The translated text. + segments: + type: array + description: Segments of the translated text and their corresponding details. + items: + $ref: '#/components/schemas/TranscriptionSegment' + required: + - language + - duration + - text + CreateUploadRequest: + type: object + additionalProperties: false + properties: + filename: + description: | + The name of the file to upload. + type: string + purpose: + description: > + The intended purpose of the uploaded file. + + + See the [documentation on File + purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose). + type: string + enum: + - assistants + - batch + - fine-tune + - vision + bytes: + description: | + The number of bytes in the file you are uploading. + type: integer + mime_type: + description: > + The MIME type of the file. + + + This must fall within the supported MIME types for your file purpose. See the supported MIME types + for assistants and vision. + type: string + expires_after: + $ref: '#/components/schemas/FileExpirationAfter' + required: + - filename + - purpose + - bytes + - mime_type + CreateVectorStoreFileBatchRequest: + type: object + additionalProperties: false + properties: + file_ids: + description: >- + A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store + should use. Useful for tools like `file_search` that can access files. If `attributes` or + `chunking_strategy` are provided, they will be applied to all files in the batch. Mutually + exclusive with `files`. + type: array + minItems: 1 + maxItems: 500 + items: + type: string + files: + description: >- + A list of objects that each include a `file_id` plus optional `attributes` or `chunking_strategy`. + Use this when you need to override metadata for specific files. The global `attributes` or + `chunking_strategy` will be ignored and must be specified for each file. Mutually exclusive with + `file_ids`. + type: array + minItems: 1 + maxItems: 500 + items: + $ref: '#/components/schemas/CreateVectorStoreFileRequest' + chunking_strategy: + $ref: '#/components/schemas/ChunkingStrategyRequestParam' + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + CreateVectorStoreFileRequest: + type: object + additionalProperties: false + properties: + file_id: + description: >- + A [File](https://platform.openai.com/docs/api-reference/files) ID that the vector store should + use. Useful for tools like `file_search` that can access files. + type: string + chunking_strategy: + $ref: '#/components/schemas/ChunkingStrategyRequestParam' + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + required: + - file_id + CreateVectorStoreRequest: + type: object + additionalProperties: false + properties: + file_ids: + description: >- + A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store + should use. Useful for tools like `file_search` that can access files. + type: array + maxItems: 500 + items: + type: string + name: + description: The name of the vector store. + type: string + description: + description: A description for the vector store. Can be used to describe the vector store's purpose. + type: string + expires_after: + $ref: '#/components/schemas/VectorStoreExpirationAfter' + chunking_strategy: + $ref: '#/components/schemas/ChunkingStrategyRequestParam' + metadata: + $ref: '#/components/schemas/Metadata' + CustomToolCall: + type: object + title: Custom tool call + description: | + A call to a custom tool created by the model. + properties: + type: + type: string + enum: + - custom_tool_call + x-stainless-const: true + description: | + The type of the custom tool call. Always `custom_tool_call`. + id: + type: string + description: | + The unique ID of the custom tool call in the OpenAI platform. + call_id: + type: string + description: | + An identifier used to map this custom tool call to a tool call output. + name: + type: string + description: | + The name of the custom tool being called. + input: + type: string + description: | + The input for the custom tool call generated by the model. + required: + - type + - call_id + - name + - input + CustomToolCallOutput: + type: object + title: Custom tool call output + description: | + The output of a custom tool call from your code, being sent back to the model. + properties: + type: + type: string + enum: + - custom_tool_call_output + x-stainless-const: true + description: | + The type of the custom tool call output. Always `custom_tool_call_output`. + id: + type: string + description: | + The unique ID of the custom tool call output in the OpenAI platform. + call_id: + type: string + description: | + The call ID, used to map this custom tool call output to a custom tool call. + output: + description: | + The output from the custom tool call generated by your code. + Can be a string or an list of output content. + anyOf: + - type: string + description: | + A string of the output of the custom tool call. + title: string output + - type: array + items: + $ref: '#/components/schemas/FunctionAndCustomToolCallOutput' + title: output content list + description: | + Text, image, or file output of the custom tool call. + required: + - type + - call_id + - output + CustomToolChatCompletions: + type: object + title: Custom tool + description: | + A custom tool that processes input using a specified format. + properties: + type: + type: string + enum: + - custom + description: The type of the custom tool. Always `custom`. + x-stainless-const: true + custom: + type: object + title: Custom tool properties + description: | + Properties of the custom tool. + properties: + name: + type: string + description: The name of the custom tool, used to identify it in tool calls. + description: + type: string + description: | + Optional description of the custom tool, used to provide more context. + format: + description: | + The input format for the custom tool. Default is unconstrained text. + anyOf: + - type: object + title: Text format + description: Unconstrained free-form text. + properties: + type: + type: string + enum: + - text + description: Unconstrained text format. Always `text`. + x-stainless-const: true + required: + - type + additionalProperties: false + - type: object + title: Grammar format + description: A grammar defined by the user. + properties: + type: + type: string + enum: + - grammar + description: Grammar format. Always `grammar`. + x-stainless-const: true + grammar: + type: object + title: Grammar format + description: Your chosen grammar. + properties: + definition: + type: string + description: The grammar definition. + syntax: + type: string + description: The syntax of the grammar definition. One of `lark` or `regex`. + enum: + - lark + - regex + required: + - definition + - syntax + required: + - type + - grammar + additionalProperties: false + discriminator: + propertyName: type + required: + - name + required: + - type + - custom + DeleteAssistantResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + enum: + - assistant.deleted + x-stainless-const: true + required: + - id + - object + - deleted + DeleteCertificateResponse: + type: object + properties: + object: + description: The object type, must be `certificate.deleted`. + x-stainless-const: true + const: certificate.deleted + id: + type: string + description: The ID of the certificate that was deleted. + required: + - object + - id + DeleteFileResponse: + type: object + properties: + id: + type: string + object: + type: string + enum: + - file + x-stainless-const: true + deleted: + type: boolean + required: + - id + - object + - deleted + DeleteFineTuningCheckpointPermissionResponse: + type: object + properties: + id: + type: string + description: The ID of the fine-tuned model checkpoint permission that was deleted. + object: + type: string + description: The object type, which is always "checkpoint.permission". + enum: + - checkpoint.permission + x-stainless-const: true + deleted: + type: boolean + description: Whether the fine-tuned model checkpoint permission was successfully deleted. + required: + - id + - object + - deleted + DeleteMessageResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + enum: + - thread.message.deleted + x-stainless-const: true + required: + - id + - object + - deleted + DeleteModelResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + required: + - id + - object + - deleted + DeleteThreadResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + enum: + - thread.deleted + x-stainless-const: true + required: + - id + - object + - deleted + DeleteVectorStoreFileResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + enum: + - vector_store.file.deleted + x-stainless-const: true + required: + - id + - object + - deleted + DeleteVectorStoreResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + enum: + - vector_store.deleted + x-stainless-const: true + required: + - id + - object + - deleted + DeletedConversation: + title: The deleted conversation object + allOf: + - $ref: '#/components/schemas/DeletedConversationResource' + x-oaiMeta: + name: The deleted conversation object + group: conversations + DoneEvent: + type: object + properties: + event: + type: string + enum: + - done + x-stainless-const: true + data: + type: string + enum: + - '[DONE]' + x-stainless-const: true + required: + - event + - data + description: Occurs when a stream ends. + x-oaiMeta: + dataDescription: '`data` is `[DONE]`' + Drag: + type: object + title: Drag + description: | + A drag action. + properties: + type: + type: string + enum: + - drag + default: drag + description: | + Specifies the event type. For a drag action, this property is + always set to `drag`. + x-stainless-const: true + path: + type: array + description: > + An array of coordinates representing the path of the drag action. Coordinates will appear as an + array + + of objects, eg + + ``` + + [ + { x: 100, y: 200 }, + { x: 200, y: 300 } + ] + + ``` + items: + $ref: '#/components/schemas/DragPoint' + required: + - type + - path + EasyInputMessage: + type: object + title: Input message + description: | + A message input to the model with a role indicating instruction following + hierarchy. Instructions given with the `developer` or `system` role take + precedence over instructions given with the `user` role. Messages with the + `assistant` role are presumed to have been generated by the model in previous + interactions. + properties: + role: + type: string + description: | + The role of the message input. One of `user`, `assistant`, `system`, or + `developer`. + enum: + - user + - assistant + - system + - developer + content: + description: | + Text, image, or audio input to the model, used to generate a response. + Can also contain previous assistant responses. + anyOf: + - type: string + title: Text input + description: | + A text input to the model. + - $ref: '#/components/schemas/InputMessageContentList' + type: + type: string + description: | + The type of the message input. Always `message`. + enum: + - message + x-stainless-const: true + required: + - role + - content + Embedding: + type: object + description: | + Represents an embedding vector returned by embedding endpoint. + properties: + index: + type: integer + description: The index of the embedding in the list of embeddings. + embedding: + type: array + description: > + The embedding vector, which is a list of floats. The length of vector depends on the model as + listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). + items: + type: number + format: float + object: + type: string + description: The object type, which is always "embedding". + enum: + - embedding + x-stainless-const: true + required: + - index + - object + - embedding + x-oaiMeta: + name: The embedding object + example: | + { + "object": "embedding", + "embedding": [ + 0.0023064255, + -0.009327292, + .... (1536 floats total for ada-002) + -0.0028842222, + ], + "index": 0 + } + Error: + type: object + properties: + code: + anyOf: + - type: string + - type: 'null' + message: + type: string + param: + anyOf: + - type: string + - type: 'null' + type: + type: string + required: + - type + - message + - param + - code + ErrorEvent: + type: object + properties: + event: + type: string + enum: + - error + x-stainless-const: true + data: + $ref: '#/components/schemas/Error' + required: + - event + - data + description: >- + Occurs when an [error](https://platform.openai.com/docs/guides/error-codes#api-errors) occurs. This + can happen due to an internal server error or a timeout. + x-oaiMeta: + dataDescription: '`data` is an [error](/docs/guides/error-codes#api-errors)' + ErrorResponse: + type: object + properties: + error: + $ref: '#/components/schemas/Error' + required: + - error + Eval: + type: object + title: Eval + description: | + An Eval object with a data source config and testing criteria. + An Eval represents a task to be done for your LLM integration. + Like: + - Improve the quality of my chatbot + - See how well my chatbot handles customer support + - Check if o4-mini is better at my usecase than gpt-4o + properties: + object: + type: string + enum: + - eval + default: eval + description: The object type. + x-stainless-const: true + id: + type: string + description: Unique identifier for the evaluation. + name: + type: string + description: The name of the evaluation. + example: Chatbot effectiveness Evaluation + data_source_config: + type: object + description: Configuration of data sources used in runs of the evaluation. + anyOf: + - $ref: '#/components/schemas/EvalCustomDataSourceConfig' + - $ref: '#/components/schemas/EvalLogsDataSourceConfig' + - $ref: '#/components/schemas/EvalStoredCompletionsDataSourceConfig' + discriminator: + propertyName: type + testing_criteria: + description: A list of testing criteria. + type: array + items: + anyOf: + - $ref: '#/components/schemas/EvalGraderLabelModel' + - $ref: '#/components/schemas/EvalGraderStringCheck' + - $ref: '#/components/schemas/EvalGraderTextSimilarity' + - $ref: '#/components/schemas/EvalGraderPython' + - $ref: '#/components/schemas/EvalGraderScoreModel' + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the eval was created. + metadata: + $ref: '#/components/schemas/Metadata' + required: + - id + - data_source_config + - object + - testing_criteria + - name + - created_at + - metadata + x-oaiMeta: + name: The eval object + group: evals + example: | + { + "object": "eval", + "id": "eval_67abd54d9b0081909a86353f6fb9317a", + "data_source_config": { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "label": {"type": "string"}, + }, + "required": ["label"] + }, + "include_sample_schema": true + }, + "testing_criteria": [ + { + "name": "My string check grader", + "type": "string_check", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq", + } + ], + "name": "External Data Eval", + "created_at": 1739314509, + "metadata": { + "test": "synthetics", + } + } + EvalApiError: + type: object + title: EvalApiError + description: | + An object representing an error response from the Eval API. + properties: + code: + type: string + description: The error code. + message: + type: string + description: The error message. + required: + - code + - message + x-oaiMeta: + name: The API error object + group: evals + example: | + { + "code": "internal_error", + "message": "The eval run failed due to an internal error." + } + EvalCustomDataSourceConfig: + type: object + title: CustomDataSourceConfig + description: | + A CustomDataSourceConfig which specifies the schema of your `item` and optionally `sample` namespaces. + The response schema defines the shape of the data that will be: + - Used to define your testing criteria and + - What data is required when creating a run + properties: + type: + type: string + enum: + - custom + default: custom + description: The type of data source. Always `custom`. + x-stainless-const: true + schema: + type: object + description: | + The json schema for the run data source items. + Learn how to build JSON schemas [here](https://json-schema.org/). + additionalProperties: true + required: + - type + - schema + x-oaiMeta: + name: The eval custom data source config object + group: evals + example: | + { + "type": "custom", + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object", + "properties": { + "label": {"type": "string"}, + }, + "required": ["label"] + } + }, + "required": ["item"] + } + } + EvalGraderLabelModel: + type: object + title: LabelModelGrader + allOf: + - $ref: '#/components/schemas/GraderLabelModel' + EvalGraderPython: + type: object + title: EvalGraderPython + allOf: + - $ref: '#/components/schemas/GraderPython' + - type: object + properties: + pass_threshold: + type: number + description: The threshold for the score. + x-oaiMeta: + name: Eval Python Grader + group: graders + example: | + { + "type": "python", + "name": "Example python grader", + "image_tag": "2025-05-08", + "source": """ + def grade(sample: dict, item: dict) -> float: + \""" + Returns 1.0 if `output_text` equals `label`, otherwise 0.0. + \""" + output = sample.get("output_text") + label = item.get("label") + return 1.0 if output == label else 0.0 + """, + "pass_threshold": 0.8 + } + EvalGraderScoreModel: + type: object + title: EvalGraderScoreModel + allOf: + - $ref: '#/components/schemas/GraderScoreModel' + - type: object + properties: + pass_threshold: + type: number + description: The threshold for the score. + EvalGraderStringCheck: + type: object + title: StringCheckGrader + allOf: + - $ref: '#/components/schemas/GraderStringCheck' + EvalGraderTextSimilarity: + type: object + title: EvalGraderTextSimilarity + allOf: + - $ref: '#/components/schemas/GraderTextSimilarity' + - type: object + properties: + pass_threshold: + type: number + description: The threshold for the score. + required: + - pass_threshold + x-oaiMeta: + name: Text Similarity Grader + group: graders + example: | + { + "type": "text_similarity", + "name": "Example text similarity grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "pass_threshold": 0.8, + "evaluation_metric": "fuzzy_match" + } + EvalItem: + type: object + title: EvalItem + description: | + A message input to the model with a role indicating instruction following + hierarchy. Instructions given with the `developer` or `system` role take + precedence over instructions given with the `user` role. Messages with the + `assistant` role are presumed to have been generated by the model in previous + interactions. + properties: + role: + type: string + description: | + The role of the message input. One of `user`, `assistant`, `system`, or + `developer`. + enum: + - user + - assistant + - system + - developer + content: + description: | + Inputs to the model - can contain template strings. + anyOf: + - type: string + title: Text input + description: | + A text input to the model. + - $ref: '#/components/schemas/InputTextContent' + - type: object + title: Output text + description: | + A text output from the model. + properties: + type: + type: string + description: | + The type of the output text. Always `output_text`. + enum: + - output_text + x-stainless-const: true + text: + type: string + description: | + The text output from the model. + required: + - type + - text + - type: object + title: Input image + description: | + An image input to the model. + properties: + type: + type: string + description: | + The type of the image input. Always `input_image`. + enum: + - input_image + x-stainless-const: true + image_url: + type: string + description: | + The URL of the image input. + detail: + type: string + description: > + The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. + Defaults to `auto`. + required: + - type + - image_url + - $ref: '#/components/schemas/InputAudio' + - type: array + title: An array of Input text, Input image, and Input audio + description: > + A list of inputs, each of which may be either an input text, input image, or input audio + object. + type: + type: string + description: | + The type of the message input. Always `message`. + enum: + - message + x-stainless-const: true + required: + - role + - content + EvalJsonlFileContentSource: + type: object + title: EvalJsonlFileContentSource + properties: + type: + type: string + enum: + - file_content + default: file_content + description: The type of jsonl source. Always `file_content`. + x-stainless-const: true + content: + type: array + items: + type: object + properties: + item: + type: object + additionalProperties: true + sample: + type: object + additionalProperties: true + required: + - item + description: The content of the jsonl file. + required: + - type + - content + EvalJsonlFileIdSource: + type: object + title: EvalJsonlFileIdSource + properties: + type: + type: string + enum: + - file_id + default: file_id + description: The type of jsonl source. Always `file_id`. + x-stainless-const: true + id: + type: string + description: The identifier of the file. + required: + - type + - id + EvalList: + type: object + title: EvalList + description: | + An object representing a list of evals. + properties: + object: + type: string + enum: + - list + default: list + description: | + The type of this object. It is always set to "list". + x-stainless-const: true + data: + type: array + description: | + An array of eval objects. + items: + $ref: '#/components/schemas/Eval' + first_id: + type: string + description: The identifier of the first eval in the data array. + last_id: + type: string + description: The identifier of the last eval in the data array. + has_more: + type: boolean + description: Indicates whether there are more evals available. + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: The eval list object + group: evals + example: | + { + "object": "list", + "data": [ + { + "object": "eval", + "id": "eval_67abd54d9b0081909a86353f6fb9317a", + "data_source_config": { + "type": "custom", + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object", + "properties": { + "input": { + "type": "string" + }, + "ground_truth": { + "type": "string" + } + }, + "required": [ + "input", + "ground_truth" + ] + } + }, + "required": [ + "item" + ] + } + }, + "testing_criteria": [ + { + "name": "String check", + "id": "String check-2eaf2d8d-d649-4335-8148-9535a7ca73c2", + "type": "string_check", + "input": "{{item.input}}", + "reference": "{{item.ground_truth}}", + "operation": "eq" + } + ], + "name": "External Data Eval", + "created_at": 1739314509, + "metadata": {}, + } + ], + "first_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "last_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "has_more": true + } + EvalLogsDataSourceConfig: + type: object + title: LogsDataSourceConfig + description: > + A LogsDataSourceConfig which specifies the metadata property of your logs query. + + This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. + + The schema returned by this data source config is used to defined what variables are available in your + evals. + + `item` and `sample` are both defined when using this data source config. + properties: + type: + type: string + enum: + - logs + default: logs + description: The type of data source. Always `logs`. + x-stainless-const: true + metadata: + $ref: '#/components/schemas/Metadata' + schema: + type: object + description: | + The json schema for the run data source items. + Learn how to build JSON schemas [here](https://json-schema.org/). + additionalProperties: true + required: + - type + - schema + x-oaiMeta: + name: The logs data source object for evals + group: evals + example: | + { + "type": "logs", + "metadata": { + "language": "english" + }, + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object" + }, + "sample": { + "type": "object" + } + }, + "required": [ + "item", + "sample" + } + } + EvalResponsesSource: + type: object + title: EvalResponsesSource + description: | + A EvalResponsesSource object describing a run data source configuration. + properties: + type: + type: string + enum: + - responses + description: The type of run data source. Always `responses`. + metadata: + anyOf: + - type: object + description: Metadata filter for the responses. This is a query parameter used to select responses. + - type: 'null' + model: + anyOf: + - type: string + description: >- + The name of the model to find responses for. This is a query parameter used to select + responses. + - type: 'null' + instructions_search: + anyOf: + - type: string + description: >- + Optional string to search the 'instructions' field. This is a query parameter used to select + responses. + - type: 'null' + created_after: + anyOf: + - type: integer + minimum: 0 + description: >- + Only include items created after this timestamp (inclusive). This is a query parameter used to + select responses. + - type: 'null' + created_before: + anyOf: + - type: integer + minimum: 0 + description: >- + Only include items created before this timestamp (inclusive). This is a query parameter used + to select responses. + - type: 'null' + reasoning_effort: + anyOf: + - $ref: '#/components/schemas/ReasoningEffort' + description: Optional reasoning effort parameter. This is a query parameter used to select responses. + - type: 'null' + temperature: + anyOf: + - type: number + description: Sampling temperature. This is a query parameter used to select responses. + - type: 'null' + top_p: + anyOf: + - type: number + description: Nucleus sampling parameter. This is a query parameter used to select responses. + - type: 'null' + users: + anyOf: + - type: array + items: + type: string + description: List of user identifiers. This is a query parameter used to select responses. + - type: 'null' + tools: + anyOf: + - type: array + items: + type: string + description: List of tool names. This is a query parameter used to select responses. + - type: 'null' + required: + - type + x-oaiMeta: + name: The run data source object used to configure an individual run + group: eval runs + example: | + { + "type": "responses", + "model": "gpt-4o-mini-2024-07-18", + "temperature": 0.7, + "top_p": 1.0, + "users": ["user1", "user2"], + "tools": ["tool1", "tool2"], + "instructions_search": "You are a coding assistant" + } + EvalRun: + type: object + title: EvalRun + description: | + A schema representing an evaluation run. + properties: + object: + type: string + enum: + - eval.run + default: eval.run + description: The type of the object. Always "eval.run". + x-stainless-const: true + id: + type: string + description: Unique identifier for the evaluation run. + eval_id: + type: string + description: The identifier of the associated evaluation. + status: + type: string + description: The status of the evaluation run. + model: + type: string + description: The model that is evaluated, if applicable. + name: + type: string + description: The name of the evaluation run. + created_at: + type: integer + description: Unix timestamp (in seconds) when the evaluation run was created. + report_url: + type: string + description: The URL to the rendered evaluation run report on the UI dashboard. + result_counts: + type: object + description: Counters summarizing the outcomes of the evaluation run. + properties: + total: + type: integer + description: Total number of executed output items. + errored: + type: integer + description: Number of output items that resulted in an error. + failed: + type: integer + description: Number of output items that failed to pass the evaluation. + passed: + type: integer + description: Number of output items that passed the evaluation. + required: + - total + - errored + - failed + - passed + per_model_usage: + type: array + description: Usage statistics for each model during the evaluation run. + items: + type: object + properties: + model_name: + type: string + description: The name of the model. + x-stainless-naming: + python: + property_name: run_model_name + invocation_count: + type: integer + description: The number of invocations. + prompt_tokens: + type: integer + description: The number of prompt tokens used. + completion_tokens: + type: integer + description: The number of completion tokens generated. + total_tokens: + type: integer + description: The total number of tokens used. + cached_tokens: + type: integer + description: The number of tokens retrieved from cache. + required: + - model_name + - invocation_count + - prompt_tokens + - completion_tokens + - total_tokens + - cached_tokens + per_testing_criteria_results: + type: array + description: Results per testing criteria applied during the evaluation run. + items: + type: object + properties: + testing_criteria: + type: string + description: A description of the testing criteria. + passed: + type: integer + description: Number of tests passed for this criteria. + failed: + type: integer + description: Number of tests failed for this criteria. + required: + - testing_criteria + - passed + - failed + data_source: + type: object + description: Information about the run's data source. + anyOf: + - $ref: '#/components/schemas/CreateEvalJsonlRunDataSource' + - $ref: '#/components/schemas/CreateEvalCompletionsRunDataSource' + - $ref: '#/components/schemas/CreateEvalResponsesRunDataSource' + discriminator: + propertyName: type + metadata: + $ref: '#/components/schemas/Metadata' + error: + $ref: '#/components/schemas/EvalApiError' + required: + - object + - id + - eval_id + - status + - model + - name + - created_at + - report_url + - result_counts + - per_model_usage + - per_testing_criteria_results + - data_source + - metadata + - error + x-oaiMeta: + name: The eval run object + group: evals + example: | + { + "object": "eval.run", + "id": "evalrun_67e57965b480819094274e3a32235e4c", + "eval_id": "eval_67e579652b548190aaa83ada4b125f47", + "report_url": "https://platform.openai.com/evaluations/eval_67e579652b548190aaa83ada4b125f47?run_id=evalrun_67e57965b480819094274e3a32235e4c", + "status": "queued", + "model": "gpt-4o-mini", + "name": "gpt-4o-mini", + "created_at": 1743092069, + "result_counts": { + "total": 0, + "errored": 0, + "failed": 0, + "passed": 0 + }, + "per_model_usage": null, + "per_testing_criteria_results": null, + "data_source": { + "type": "completions", + "source": { + "type": "file_content", + "content": [ + { + "item": { + "input": "Tech Company Launches Advanced Artificial Intelligence Platform", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Central Bank Increases Interest Rates Amid Inflation Concerns", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Summit Addresses Climate Change Strategies", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Major Retailer Reports Record-Breaking Holiday Sales", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "National Team Qualifies for World Championship Finals", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Stock Markets Rally After Positive Economic Data Released", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "Global Manufacturer Announces Merger with Competitor", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Breakthrough in Renewable Energy Technology Unveiled", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "World Leaders Sign Historic Climate Agreement", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Professional Athlete Sets New Record in Championship Event", + "ground_truth": "Sports" + } + }, + { + "item": { + "input": "Financial Institutions Adapt to New Regulatory Requirements", + "ground_truth": "Business" + } + }, + { + "item": { + "input": "Tech Conference Showcases Advances in Artificial Intelligence", + "ground_truth": "Technology" + } + }, + { + "item": { + "input": "Global Markets Respond to Oil Price Fluctuations", + "ground_truth": "Markets" + } + }, + { + "item": { + "input": "International Cooperation Strengthened Through New Treaty", + "ground_truth": "World" + } + }, + { + "item": { + "input": "Sports League Announces Revised Schedule for Upcoming Season", + "ground_truth": "Sports" + } + } + ] + }, + "input_messages": { + "type": "template", + "template": [ + { + "type": "message", + "role": "developer", + "content": { + "type": "input_text", + "text": "Categorize a given news headline into one of the following topics: Technology, Markets, World, Business, or Sports.\n\n# Steps\n\n1. Analyze the content of the news headline to understand its primary focus.\n2. Extract the subject matter, identifying any key indicators or keywords.\n3. Use the identified indicators to determine the most suitable category out of the five options: Technology, Markets, World, Business, or Sports.\n4. Ensure only one category is selected per headline.\n\n# Output Format\n\nRespond with the chosen category as a single word. For instance: \"Technology\", \"Markets\", \"World\", \"Business\", or \"Sports\".\n\n# Examples\n\n**Input**: \"Apple Unveils New iPhone Model, Featuring Advanced AI Features\" \n**Output**: \"Technology\"\n\n**Input**: \"Global Stocks Mixed as Investors Await Central Bank Decisions\" \n**Output**: \"Markets\"\n\n**Input**: \"War in Ukraine: Latest Updates on Negotiation Status\" \n**Output**: \"World\"\n\n**Input**: \"Microsoft in Talks to Acquire Gaming Company for $2 Billion\" \n**Output**: \"Business\"\n\n**Input**: \"Manchester United Secures Win in Premier League Football Match\" \n**Output**: \"Sports\" \n\n# Notes\n\n- If the headline appears to fit into more than one category, choose the most dominant theme.\n- Keywords or phrases such as \"stocks\", \"company acquisition\", \"match\", or technological brands can be good indicators for classification.\n" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "{{item.input}}" + } + } + ] + }, + "model": "gpt-4o-mini", + "sampling_params": { + "seed": 42, + "temperature": 1.0, + "top_p": 1.0, + "max_completions_tokens": 2048 + } + }, + "error": null, + "metadata": {} + } + EvalRunList: + type: object + title: EvalRunList + description: | + An object representing a list of runs for an evaluation. + properties: + object: + type: string + enum: + - list + default: list + description: | + The type of this object. It is always set to "list". + x-stainless-const: true + data: + type: array + description: | + An array of eval run objects. + items: + $ref: '#/components/schemas/EvalRun' + first_id: + type: string + description: The identifier of the first eval run in the data array. + last_id: + type: string + description: The identifier of the last eval run in the data array. + has_more: + type: boolean + description: Indicates whether there are more evals available. + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: The eval run list object + group: evals + example: | + { + "object": "list", + "data": [ + { + "object": "eval.run", + "id": "evalrun_67b7fbdad46c819092f6fe7a14189620", + "eval_id": "eval_67b7fa9a81a88190ab4aa417e397ea21", + "report_url": "https://platform.openai.com/evaluations/eval_67b7fa9a81a88190ab4aa417e397ea21?run_id=evalrun_67b7fbdad46c819092f6fe7a14189620", + "status": "completed", + "model": "o3-mini", + "name": "Academic Assistant", + "created_at": 1740110812, + "result_counts": { + "total": 171, + "errored": 0, + "failed": 80, + "passed": 91 + }, + "per_model_usage": null, + "per_testing_criteria_results": [ + { + "testing_criteria": "String check grader", + "passed": 91, + "failed": 80 + } + ], + "run_data_source": { + "type": "completions", + "template_messages": [ + { + "type": "message", + "role": "system", + "content": { + "type": "input_text", + "text": "You are a helpful assistant." + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "Hello, can you help me with my homework?" + } + } + ], + "datasource_reference": null, + "model": "o3-mini", + "max_completion_tokens": null, + "seed": null, + "temperature": null, + "top_p": null + }, + "error": null, + "metadata": {"test": "synthetics"} + } + ], + "first_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "last_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "has_more": false + } + EvalRunOutputItem: + type: object + title: EvalRunOutputItem + description: | + A schema representing an evaluation run output item. + properties: + object: + type: string + enum: + - eval.run.output_item + default: eval.run.output_item + description: The type of the object. Always "eval.run.output_item". + x-stainless-const: true + id: + type: string + description: Unique identifier for the evaluation run output item. + run_id: + type: string + description: The identifier of the evaluation run associated with this output item. + eval_id: + type: string + description: The identifier of the evaluation group. + created_at: + type: integer + description: Unix timestamp (in seconds) when the evaluation run was created. + status: + type: string + description: The status of the evaluation run. + datasource_item_id: + type: integer + description: The identifier for the data source item. + datasource_item: + type: object + description: Details of the input data source item. + additionalProperties: true + results: + type: array + description: A list of grader results for this output item. + items: + $ref: '#/components/schemas/EvalRunOutputItemResult' + sample: + type: object + description: A sample containing the input and output of the evaluation run. + properties: + input: + type: array + description: An array of input messages. + items: + type: object + description: An input message. + properties: + role: + type: string + description: The role of the message sender (e.g., system, user, developer). + content: + type: string + description: The content of the message. + required: + - role + - content + output: + type: array + description: An array of output messages. + items: + type: object + properties: + role: + type: string + description: The role of the message (e.g. "system", "assistant", "user"). + content: + type: string + description: The content of the message. + finish_reason: + type: string + description: The reason why the sample generation was finished. + model: + type: string + description: The model used for generating the sample. + usage: + type: object + description: Token usage details for the sample. + properties: + total_tokens: + type: integer + description: The total number of tokens used. + completion_tokens: + type: integer + description: The number of completion tokens generated. + prompt_tokens: + type: integer + description: The number of prompt tokens used. + cached_tokens: + type: integer + description: The number of tokens retrieved from cache. + required: + - total_tokens + - completion_tokens + - prompt_tokens + - cached_tokens + error: + $ref: '#/components/schemas/EvalApiError' + temperature: + type: number + description: The sampling temperature used. + max_completion_tokens: + type: integer + description: The maximum number of tokens allowed for completion. + top_p: + type: number + description: The top_p value used for sampling. + seed: + type: integer + description: The seed used for generating the sample. + required: + - input + - output + - finish_reason + - model + - usage + - error + - temperature + - max_completion_tokens + - top_p + - seed + required: + - object + - id + - run_id + - eval_id + - created_at + - status + - datasource_item_id + - datasource_item + - results + - sample + x-oaiMeta: + name: The eval run output item object + group: evals + example: | + { + "object": "eval.run.output_item", + "id": "outputitem_67abd55eb6548190bb580745d5644a33", + "run_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "created_at": 1739314509, + "status": "pass", + "datasource_item_id": 137, + "datasource_item": { + "teacher": "To grade essays, I only check for style, content, and grammar.", + "student": "I am a student who is trying to write the best essay." + }, + "results": [ + { + "name": "String Check Grader", + "type": "string-check-grader", + "score": 1.0, + "passed": true, + } + ], + "sample": { + "input": [ + { + "role": "system", + "content": "You are an evaluator bot..." + }, + { + "role": "user", + "content": "You are assessing..." + } + ], + "output": [ + { + "role": "assistant", + "content": "The rubric is not clear nor concise." + } + ], + "finish_reason": "stop", + "model": "gpt-4o-2024-08-06", + "usage": { + "total_tokens": 521, + "completion_tokens": 2, + "prompt_tokens": 519, + "cached_tokens": 0 + }, + "error": null, + "temperature": 1.0, + "max_completion_tokens": 2048, + "top_p": 1.0, + "seed": 42 + } + } + EvalRunOutputItemList: + type: object + title: EvalRunOutputItemList + description: | + An object representing a list of output items for an evaluation run. + properties: + object: + type: string + enum: + - list + default: list + description: | + The type of this object. It is always set to "list". + x-stainless-const: true + data: + type: array + description: | + An array of eval run output item objects. + items: + $ref: '#/components/schemas/EvalRunOutputItem' + first_id: + type: string + description: The identifier of the first eval run output item in the data array. + last_id: + type: string + description: The identifier of the last eval run output item in the data array. + has_more: + type: boolean + description: Indicates whether there are more eval run output items available. + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: The eval run output item list object + group: evals + example: | + { + "object": "list", + "data": [ + { + "object": "eval.run.output_item", + "id": "outputitem_67abd55eb6548190bb580745d5644a33", + "run_id": "evalrun_67abd54d60ec8190832b46859da808f7", + "eval_id": "eval_67abd54d9b0081909a86353f6fb9317a", + "created_at": 1739314509, + "status": "pass", + "datasource_item_id": 137, + "datasource_item": { + "teacher": "To grade essays, I only check for style, content, and grammar.", + "student": "I am a student who is trying to write the best essay." + }, + "results": [ + { + "name": "String Check Grader", + "type": "string-check-grader", + "score": 1.0, + "passed": true, + } + ], + "sample": { + "input": [ + { + "role": "system", + "content": "You are an evaluator bot..." + }, + { + "role": "user", + "content": "You are assessing..." + } + ], + "output": [ + { + "role": "assistant", + "content": "The rubric is not clear nor concise." + } + ], + "finish_reason": "stop", + "model": "gpt-4o-2024-08-06", + "usage": { + "total_tokens": 521, + "completion_tokens": 2, + "prompt_tokens": 519, + "cached_tokens": 0 + }, + "error": null, + "temperature": 1.0, + "max_completion_tokens": 2048, + "top_p": 1.0, + "seed": 42 + } + }, + ], + "first_id": "outputitem_67abd55eb6548190bb580745d5644a33", + "last_id": "outputitem_67abd55eb6548190bb580745d5644a33", + "has_more": false + } + EvalRunOutputItemResult: + type: object + title: EvalRunOutputItemResult + description: | + A single grader result for an evaluation run output item. + properties: + name: + type: string + description: The name of the grader. + type: + type: string + description: The grader type (for example, "string-check-grader"). + score: + type: number + description: The numeric score produced by the grader. + passed: + type: boolean + description: Whether the grader considered the output a pass. + sample: + anyOf: + - type: object + additionalProperties: true + - type: 'null' + description: Optional sample or intermediate data produced by the grader. + additionalProperties: true + required: + - name + - score + - passed + EvalStoredCompletionsDataSourceConfig: + type: object + title: StoredCompletionsDataSourceConfig + description: | + Deprecated in favor of LogsDataSourceConfig. + properties: + type: + type: string + enum: + - stored_completions + default: stored_completions + description: The type of data source. Always `stored_completions`. + x-stainless-const: true + metadata: + $ref: '#/components/schemas/Metadata' + schema: + type: object + description: | + The json schema for the run data source items. + Learn how to build JSON schemas [here](https://json-schema.org/). + additionalProperties: true + required: + - type + - schema + deprecated: true + x-oaiMeta: + name: The stored completions data source object for evals + group: evals + example: | + { + "type": "stored_completions", + "metadata": { + "language": "english" + }, + "schema": { + "type": "object", + "properties": { + "item": { + "type": "object" + }, + "sample": { + "type": "object" + } + }, + "required": [ + "item", + "sample" + } + } + EvalStoredCompletionsSource: + type: object + title: StoredCompletionsRunDataSource + description: | + A StoredCompletionsRunDataSource configuration describing a set of filters + properties: + type: + type: string + enum: + - stored_completions + default: stored_completions + description: The type of source. Always `stored_completions`. + x-stainless-const: true + metadata: + $ref: '#/components/schemas/Metadata' + model: + anyOf: + - type: string + description: An optional model to filter by (e.g., 'gpt-4o'). + - type: 'null' + created_after: + anyOf: + - type: integer + description: An optional Unix timestamp to filter items created after this time. + - type: 'null' + created_before: + anyOf: + - type: integer + description: An optional Unix timestamp to filter items created before this time. + - type: 'null' + limit: + anyOf: + - type: integer + description: An optional maximum number of items to return. + - type: 'null' + required: + - type + x-oaiMeta: + name: The stored completions data source object used to configure an individual run + group: eval runs + example: | + { + "type": "stored_completions", + "model": "gpt-4o", + "created_after": 1668124800, + "created_before": 1668124900, + "limit": 100, + "metadata": {} + } + FileExpirationAfter: + type: object + title: File expiration policy + description: >- + The expiration policy for a file. By default, files with `purpose=batch` expire after 30 days and all + other files are persisted until they are manually deleted. + properties: + anchor: + description: 'Anchor timestamp after which the expiration policy applies. Supported anchors: `created_at`.' + type: string + enum: + - created_at + x-stainless-const: true + seconds: + description: >- + The number of seconds after the anchor time that the file will expire. Must be between 3600 (1 + hour) and 2592000 (30 days). + type: integer + minimum: 3600 + maximum: 2592000 + required: + - anchor + - seconds + FilePath: + type: object + title: File path + description: | + A path to a file. + properties: + type: + type: string + description: | + The type of the file path. Always `file_path`. + enum: + - file_path + x-stainless-const: true + file_id: + type: string + description: | + The ID of the file. + index: + type: integer + description: | + The index of the file in the list of files. + required: + - type + - file_id + - index + FileSearchRanker: + type: string + description: The ranker to use for the file search. If not specified will use the `auto` ranker. + enum: + - auto + - default_2024_08_21 + FileSearchRankingOptions: + title: File search tool call ranking options + type: object + description: > + The ranking options for the file search. If not specified, the file search tool will use the `auto` + ranker and a score_threshold of 0. + + + See the [file search tool + documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) + for more information. + properties: + ranker: + $ref: '#/components/schemas/FileSearchRanker' + score_threshold: + type: number + description: >- + The score threshold for the file search. All values must be a floating point number between 0 and + 1. + minimum: 0 + maximum: 1 + required: + - score_threshold + FileSearchToolCall: + type: object + title: File search tool call + description: | + The results of a file search tool call. See the + [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information. + properties: + id: + type: string + description: | + The unique ID of the file search tool call. + type: + type: string + enum: + - file_search_call + description: | + The type of the file search tool call. Always `file_search_call`. + x-stainless-const: true + status: + type: string + description: | + The status of the file search tool call. One of `in_progress`, + `searching`, `incomplete` or `failed`, + enum: + - in_progress + - searching + - completed + - incomplete + - failed + queries: + type: array + items: + type: string + description: | + The queries used to search for files. + results: + anyOf: + - type: array + description: | + The results of the file search tool call. + items: + type: object + properties: + file_id: + type: string + description: | + The unique ID of the file. + text: + type: string + description: | + The text that was retrieved from the file. + filename: + type: string + description: | + The name of the file. + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + score: + type: number + format: float + description: | + The relevance score of the file - a value between 0 and 1. + - type: 'null' + required: + - id + - type + - status + - queries + FineTuneChatCompletionRequestAssistantMessage: + allOf: + - type: object + title: Assistant message + deprecated: false + properties: + weight: + type: integer + enum: + - 0 + - 1 + description: Controls whether the assistant message is trained against (0 or 1) + - $ref: '#/components/schemas/ChatCompletionRequestAssistantMessage' + required: + - role + FineTuneChatRequestInput: + type: object + description: | + The per-line training example of a fine-tuning input file for chat models using the supervised method. + Input messages may contain text or image content only. Audio and file input messages + are not currently supported for fine-tuning. + properties: + messages: + type: array + minItems: 1 + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestSystemMessage' + - $ref: '#/components/schemas/ChatCompletionRequestUserMessage' + - $ref: '#/components/schemas/FineTuneChatCompletionRequestAssistantMessage' + - $ref: '#/components/schemas/ChatCompletionRequestToolMessage' + - $ref: '#/components/schemas/ChatCompletionRequestFunctionMessage' + tools: + type: array + description: A list of tools the model may generate JSON inputs for. + items: + $ref: '#/components/schemas/ChatCompletionTool' + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + functions: + deprecated: true + description: A list of functions the model may generate JSON inputs for. + type: array + minItems: 1 + maxItems: 128 + items: + $ref: '#/components/schemas/ChatCompletionFunctions' + x-oaiMeta: + name: Training format for chat models using the supervised method + example: | + { + "messages": [ + { "role": "user", "content": "What is the weather in San Francisco?" }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_id", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, USA\", \"format\": \"celsius\"}" + } + } + ] + } + ], + "parallel_tool_calls": false, + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and country, eg. San Francisco, USA" + }, + "format": { "type": "string", "enum": ["celsius", "fahrenheit"] } + }, + "required": ["location", "format"] + } + } + } + ] + } + FineTuneDPOHyperparameters: + type: object + description: The hyperparameters used for the DPO fine-tuning job. + properties: + beta: + description: > + The beta value for the DPO method. A higher beta value will increase the weight of the penalty + between the policy and reference model. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: number + minimum: 0 + maximum: 2 + exclusiveMinimum: true + batch_size: + description: > + Number of examples in each batch. A larger batch size means that model parameters are updated less + frequently, but with lower variance. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 256 + learning_rate_multiplier: + description: | + Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: number + minimum: 0 + exclusiveMinimum: true + n_epochs: + description: > + The number of epochs to train the model for. An epoch refers to one full cycle through the + training dataset. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 50 + FineTuneDPOMethod: + type: object + description: Configuration for the DPO fine-tuning method. + properties: + hyperparameters: + $ref: '#/components/schemas/FineTuneDPOHyperparameters' + FineTuneMethod: + type: object + description: The method used for fine-tuning. + properties: + type: + type: string + description: The type of method. Is either `supervised`, `dpo`, or `reinforcement`. + enum: + - supervised + - dpo + - reinforcement + supervised: + $ref: '#/components/schemas/FineTuneSupervisedMethod' + dpo: + $ref: '#/components/schemas/FineTuneDPOMethod' + reinforcement: + $ref: '#/components/schemas/FineTuneReinforcementMethod' + required: + - type + FineTunePreferenceRequestInput: + type: object + description: | + The per-line training example of a fine-tuning input file for chat models using the dpo method. + Input messages may contain text or image content only. Audio and file input messages + are not currently supported for fine-tuning. + properties: + input: + type: object + properties: + messages: + type: array + minItems: 1 + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestSystemMessage' + - $ref: '#/components/schemas/ChatCompletionRequestUserMessage' + - $ref: '#/components/schemas/FineTuneChatCompletionRequestAssistantMessage' + - $ref: '#/components/schemas/ChatCompletionRequestToolMessage' + - $ref: '#/components/schemas/ChatCompletionRequestFunctionMessage' + tools: + type: array + description: A list of tools the model may generate JSON inputs for. + items: + $ref: '#/components/schemas/ChatCompletionTool' + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + preferred_output: + type: array + description: The preferred completion message for the output. + maxItems: 1 + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestAssistantMessage' + non_preferred_output: + type: array + description: The non-preferred completion message for the output. + maxItems: 1 + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestAssistantMessage' + x-oaiMeta: + name: Training format for chat models using the preference method + example: | + { + "input": { + "messages": [ + { "role": "user", "content": "What is the weather in San Francisco?" } + ] + }, + "preferred_output": [ + { + "role": "assistant", + "content": "The weather in San Francisco is 70 degrees Fahrenheit." + } + ], + "non_preferred_output": [ + { + "role": "assistant", + "content": "The weather in San Francisco is 21 degrees Celsius." + } + ] + } + FineTuneReinforcementHyperparameters: + type: object + description: The hyperparameters used for the reinforcement fine-tuning job. + properties: + batch_size: + description: > + Number of examples in each batch. A larger batch size means that model parameters are updated less + frequently, but with lower variance. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 256 + learning_rate_multiplier: + description: | + Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: number + minimum: 0 + exclusiveMinimum: true + n_epochs: + description: > + The number of epochs to train the model for. An epoch refers to one full cycle through the + training dataset. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 50 + reasoning_effort: + description: | + Level of reasoning effort. + type: string + enum: + - default + - low + - medium + - high + default: default + compute_multiplier: + description: | + Multiplier on amount of compute used for exploring search space during training. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: number + minimum: 0.00001 + maximum: 10 + exclusiveMinimum: true + eval_interval: + description: | + The number of training steps between evaluation runs. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + eval_samples: + description: | + Number of evaluation samples to generate per training step. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + FineTuneReinforcementMethod: + type: object + description: Configuration for the reinforcement fine-tuning method. + properties: + grader: + type: object + description: The grader used for the fine-tuning job. + anyOf: + - $ref: '#/components/schemas/GraderStringCheck' + - $ref: '#/components/schemas/GraderTextSimilarity' + - $ref: '#/components/schemas/GraderPython' + - $ref: '#/components/schemas/GraderScoreModel' + - $ref: '#/components/schemas/GraderMulti' + hyperparameters: + $ref: '#/components/schemas/FineTuneReinforcementHyperparameters' + required: + - grader + FineTuneReinforcementRequestInput: + type: object + unevaluatedProperties: true + description: > + Per-line training example for reinforcement fine-tuning. Note that `messages` and `tools` are the only + reserved keywords. + + Any other arbitrary key-value data can be included on training datapoints and will be available to + reference during grading under the `{{ item.XXX }}` template variable. + + Input messages may contain text or image content only. Audio and file input messages + + are not currently supported for fine-tuning. + required: + - messages + properties: + messages: + type: array + minItems: 1 + items: + anyOf: + - $ref: '#/components/schemas/ChatCompletionRequestDeveloperMessage' + - $ref: '#/components/schemas/ChatCompletionRequestUserMessage' + - $ref: '#/components/schemas/FineTuneChatCompletionRequestAssistantMessage' + - $ref: '#/components/schemas/ChatCompletionRequestToolMessage' + tools: + type: array + description: A list of tools the model may generate JSON inputs for. + items: + $ref: '#/components/schemas/ChatCompletionTool' + x-oaiMeta: + name: Training format for reasoning models using the reinforcement method + example: | + { + "messages": [ + { + "role": "user", + "content": "Your task is to take a chemical in SMILES format and predict the number of hydrobond bond donors and acceptors according to Lipinkski's rule. CCN(CC)CCC(=O)c1sc(N)nc1C" + }, + ], + # Any other JSON data can be inserted into an example and referenced during RFT grading + "reference_answer": { + "donor_bond_counts": 5, + "acceptor_bond_counts": 7 + } + } + FineTuneSupervisedHyperparameters: + type: object + description: The hyperparameters used for the fine-tuning job. + properties: + batch_size: + description: > + Number of examples in each batch. A larger batch size means that model parameters are updated less + frequently, but with lower variance. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 256 + learning_rate_multiplier: + description: | + Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: number + minimum: 0 + exclusiveMinimum: true + n_epochs: + description: > + The number of epochs to train the model for. An epoch refers to one full cycle through the + training dataset. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 50 + FineTuneSupervisedMethod: + type: object + description: Configuration for the supervised fine-tuning method. + properties: + hyperparameters: + $ref: '#/components/schemas/FineTuneSupervisedHyperparameters' + FineTuningCheckpointPermission: + type: object + title: FineTuningCheckpointPermission + description: | + The `checkpoint.permission` object represents a permission for a fine-tuned model checkpoint. + properties: + id: + type: string + description: The permission identifier, which can be referenced in the API endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the permission was created. + project_id: + type: string + description: The project identifier that the permission is for. + object: + type: string + description: The object type, which is always "checkpoint.permission". + enum: + - checkpoint.permission + x-stainless-const: true + required: + - created_at + - id + - object + - project_id + x-oaiMeta: + name: The fine-tuned model checkpoint permission object + example: | + { + "object": "checkpoint.permission", + "id": "cp_zc4Q7MP6XxulcVzj4MZdwsAB", + "created_at": 1712211699, + "project_id": "proj_abGMw1llN8IrBb6SvvY5A1iH" + } + FineTuningIntegration: + type: object + title: Fine-Tuning Job Integration + required: + - type + - wandb + properties: + type: + type: string + description: The type of the integration being enabled for the fine-tuning job + enum: + - wandb + x-stainless-const: true + wandb: + type: object + description: | + The settings for your integration with Weights and Biases. This payload specifies the project that + metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags + to your run, and set a default entity (team, username, etc) to be associated with your run. + required: + - project + properties: + project: + description: | + The name of the project that the new run will be created under. + type: string + example: my-wandb-project + name: + anyOf: + - description: | + A display name to set for the run. If not set, we will use the Job ID as the name. + type: string + - type: 'null' + entity: + anyOf: + - description: > + The entity to use for the run. This allows you to set the team or username of the WandB + user that you would + + like associated with the run. If not set, the default entity for the registered WandB API + key is used. + type: string + - type: 'null' + tags: + description: > + A list of tags to be attached to the newly created run. These tags are passed through directly + to WandB. Some + + default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", + "openai/{ftjob-abcdef}". + type: array + items: + type: string + example: custom-tag + FineTuningJob: + type: object + title: FineTuningJob + description: | + The `fine_tuning.job` object represents a fine-tuning job that has been created through the API. + properties: + id: + type: string + description: The object identifier, which can be referenced in the API endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the fine-tuning job was created. + error: + anyOf: + - type: object + description: >- + For fine-tuning jobs that have `failed`, this will contain more information on the cause of + the failure. + properties: + code: + type: string + description: A machine-readable error code. + message: + type: string + description: A human-readable error message. + param: + anyOf: + - type: string + description: >- + The parameter that was invalid, usually `training_file` or `validation_file`. This + field will be null if the failure was not parameter-specific. + - type: 'null' + required: + - code + - message + - param + - type: 'null' + fine_tuned_model: + anyOf: + - type: string + description: >- + The name of the fine-tuned model that is being created. The value will be null if the + fine-tuning job is still running. + - type: 'null' + finished_at: + anyOf: + - type: integer + description: >- + The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be + null if the fine-tuning job is still running. + - type: 'null' + hyperparameters: + type: object + description: >- + The hyperparameters used for the fine-tuning job. This value will only be returned when running + `supervised` jobs. + properties: + batch_size: + anyOf: + - description: | + Number of examples in each batch. A larger batch size means that model parameters + are updated less frequently, but with lower variance. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + - type: integer + minimum: 1 + maximum: 256 + title: Auto + - type: 'null' + title: Manual + learning_rate_multiplier: + description: | + Scaling factor for the learning rate. A smaller learning rate may be useful to avoid + overfitting. + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + title: Auto + - type: number + minimum: 0 + exclusiveMinimum: true + n_epochs: + description: | + The number of epochs to train the model for. An epoch refers to one full cycle + through the training dataset. + default: auto + anyOf: + - type: string + enum: + - auto + x-stainless-const: true + title: Auto + - type: integer + minimum: 1 + maximum: 50 + model: + type: string + description: The base model that is being fine-tuned. + object: + type: string + description: The object type, which is always "fine_tuning.job". + enum: + - fine_tuning.job + x-stainless-const: true + organization_id: + type: string + description: The organization that owns the fine-tuning job. + result_files: + type: array + description: >- + The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + items: + type: string + example: file-abc123 + status: + type: string + description: >- + The current status of the fine-tuning job, which can be either `validating_files`, `queued`, + `running`, `succeeded`, `failed`, or `cancelled`. + enum: + - validating_files + - queued + - running + - succeeded + - failed + - cancelled + trained_tokens: + anyOf: + - type: integer + description: >- + The total number of billable tokens processed by this fine-tuning job. The value will be null + if the fine-tuning job is still running. + - type: 'null' + training_file: + type: string + description: >- + The file ID used for training. You can retrieve the training data with the [Files + API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + validation_file: + anyOf: + - type: string + description: >- + The file ID used for validation. You can retrieve the validation results with the [Files + API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + - type: 'null' + integrations: + anyOf: + - type: array + description: A list of integrations to enable for this fine-tuning job. + maxItems: 5 + items: + anyOf: + - $ref: '#/components/schemas/FineTuningIntegration' + discriminator: + propertyName: type + - type: 'null' + seed: + type: integer + description: The seed used for the fine-tuning job. + estimated_finish: + anyOf: + - type: integer + description: >- + The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value + will be null if the fine-tuning job is not running. + - type: 'null' + method: + $ref: '#/components/schemas/FineTuneMethod' + metadata: + $ref: '#/components/schemas/Metadata' + required: + - created_at + - error + - finished_at + - fine_tuned_model + - hyperparameters + - id + - model + - object + - organization_id + - result_files + - status + - trained_tokens + - training_file + - validation_file + - seed + x-oaiMeta: + name: The fine-tuning job object + example: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "davinci-002", + "created_at": 1692661014, + "finished_at": 1692661190, + "fine_tuned_model": "ft:davinci-002:my-org:custom_suffix:7q8mpxmy", + "organization_id": "org-123", + "result_files": [ + "file-abc123" + ], + "status": "succeeded", + "validation_file": null, + "training_file": "file-abc123", + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + }, + "trained_tokens": 5768, + "integrations": [], + "seed": 0, + "estimated_finish": 0, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + } + } + }, + "metadata": { + "key": "value" + } + } + FineTuningJobCheckpoint: + type: object + title: FineTuningJobCheckpoint + description: > + The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is + ready to use. + properties: + id: + type: string + description: The checkpoint identifier, which can be referenced in the API endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the checkpoint was created. + fine_tuned_model_checkpoint: + type: string + description: The name of the fine-tuned checkpoint model that is created. + step_number: + type: integer + description: The step number that the checkpoint was created at. + metrics: + type: object + description: Metrics at the step number during the fine-tuning job. + properties: + step: + type: number + train_loss: + type: number + train_mean_token_accuracy: + type: number + valid_loss: + type: number + valid_mean_token_accuracy: + type: number + full_valid_loss: + type: number + full_valid_mean_token_accuracy: + type: number + fine_tuning_job_id: + type: string + description: The name of the fine-tuning job that this checkpoint was created from. + object: + type: string + description: The object type, which is always "fine_tuning.job.checkpoint". + enum: + - fine_tuning.job.checkpoint + x-stainless-const: true + required: + - created_at + - fine_tuning_job_id + - fine_tuned_model_checkpoint + - id + - metrics + - object + - step_number + x-oaiMeta: + name: The fine-tuning job checkpoint object + example: | + { + "object": "fine_tuning.job.checkpoint", + "id": "ftckpt_qtZ5Gyk4BLq1SfLFWp3RtO3P", + "created_at": 1712211699, + "fine_tuned_model_checkpoint": "ft:gpt-4o-mini-2024-07-18:my-org:custom_suffix:9ABel2dg:ckpt-step-88", + "fine_tuning_job_id": "ftjob-fpbNQ3H1GrMehXRf8cO97xTN", + "metrics": { + "step": 88, + "train_loss": 0.478, + "train_mean_token_accuracy": 0.924, + "valid_loss": 10.112, + "valid_mean_token_accuracy": 0.145, + "full_valid_loss": 0.567, + "full_valid_mean_token_accuracy": 0.944 + }, + "step_number": 88 + } + FineTuningJobEvent: + type: object + description: Fine-tuning job event object + properties: + object: + type: string + description: The object type, which is always "fine_tuning.job.event". + enum: + - fine_tuning.job.event + x-stainless-const: true + id: + type: string + description: The object identifier. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the fine-tuning job was created. + level: + type: string + description: The log level of the event. + enum: + - info + - warn + - error + message: + type: string + description: The message of the event. + type: + type: string + description: The type of event. + enum: + - message + - metrics + data: + type: object + description: The data associated with the event. + required: + - id + - object + - created_at + - level + - message + x-oaiMeta: + name: The fine-tuning job event object + example: | + { + "object": "fine_tuning.job.event", + "id": "ftevent-abc123" + "created_at": 1677610602, + "level": "info", + "message": "Created fine-tuning job", + "data": {}, + "type": "message" + } + FunctionAndCustomToolCallOutput: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputTextContent' + - $ref: '#/components/schemas/InputImageContent' + - $ref: '#/components/schemas/InputFileContent' + FunctionObject: + type: object + properties: + description: + type: string + description: >- + A description of what the function does, used by the model to choose when and how to call the + function. + name: + type: string + description: >- + The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. + parameters: + $ref: '#/components/schemas/FunctionParameters' + strict: + anyOf: + - type: boolean + default: false + description: >- + Whether to enable strict schema adherence when generating the function call. If set to true, + the model will follow the exact schema defined in the `parameters` field. Only a subset of + JSON Schema is supported when `strict` is `true`. Learn more about Structured Outputs in the + [function calling guide](https://platform.openai.com/docs/guides/function-calling). + - type: 'null' + required: + - name + FunctionParameters: + type: object + description: >- + The parameters the functions accepts, described as a JSON Schema object. See the + [guide](https://platform.openai.com/docs/guides/function-calling) for examples, and the [JSON Schema + reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. + + + Omitting `parameters` defines a function with an empty parameter list. + additionalProperties: true + FunctionToolCall: + type: object + title: Function tool call + description: > + A tool call to run a function. See the + + [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more + information. + properties: + id: + type: string + description: | + The unique ID of the function tool call. + type: + type: string + enum: + - function_call + description: | + The type of the function tool call. Always `function_call`. + x-stainless-const: true + call_id: + type: string + description: | + The unique ID of the function tool call generated by the model. + name: + type: string + description: | + The name of the function to run. + arguments: + type: string + description: | + A JSON string of the arguments to pass to the function. + status: + type: string + description: | + The status of the item. One of `in_progress`, `completed`, or + `incomplete`. Populated when items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - type + - call_id + - name + - arguments + FunctionToolCallOutput: + type: object + title: Function tool call output + description: | + The output of a function tool call. + properties: + id: + type: string + description: | + The unique ID of the function tool call output. Populated when this item + is returned via API. + type: + type: string + enum: + - function_call_output + description: | + The type of the function tool call output. Always `function_call_output`. + x-stainless-const: true + call_id: + type: string + description: | + The unique ID of the function tool call generated by the model. + output: + description: | + The output from the function call generated by your code. + Can be a string or an list of output content. + anyOf: + - type: string + description: | + A string of the output of the function call. + title: string output + - type: array + items: + $ref: '#/components/schemas/FunctionAndCustomToolCallOutput' + title: output content list + description: | + Text, image, or file output of the function call. + status: + type: string + description: | + The status of the item. One of `in_progress`, `completed`, or + `incomplete`. Populated when items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - type + - call_id + - output + FunctionToolCallOutputResource: + allOf: + - $ref: '#/components/schemas/FunctionToolCallOutput' + - type: object + properties: + id: + type: string + description: | + The unique ID of the function call tool output. + required: + - id + FunctionToolCallResource: + allOf: + - $ref: '#/components/schemas/FunctionToolCall' + - type: object + properties: + id: + type: string + description: | + The unique ID of the function tool call. + required: + - id + GraderLabelModel: + type: object + title: LabelModelGrader + description: | + A LabelModelGrader object which uses a model to assign labels to each item + in the evaluation. + properties: + type: + description: The object type, which is always `label_model`. + type: string + enum: + - label_model + x-stainless-const: true + name: + type: string + description: The name of the grader. + model: + type: string + description: The model to use for the evaluation. Must support structured outputs. + input: + type: array + items: + $ref: '#/components/schemas/EvalItem' + labels: + type: array + items: + type: string + description: The labels to assign to each item in the evaluation. + passing_labels: + type: array + items: + type: string + description: The labels that indicate a passing result. Must be a subset of labels. + required: + - type + - model + - input + - passing_labels + - labels + - name + x-oaiMeta: + name: Label Model Grader + group: graders + example: | + { + "name": "First label grader", + "type": "label_model", + "model": "gpt-4o-2024-08-06", + "input": [ + { + "type": "message", + "role": "system", + "content": { + "type": "input_text", + "text": "Classify the sentiment of the following statement as one of positive, neutral, or negative" + } + }, + { + "type": "message", + "role": "user", + "content": { + "type": "input_text", + "text": "Statement: {{item.response}}" + } + } + ], + "passing_labels": [ + "positive" + ], + "labels": [ + "positive", + "neutral", + "negative" + ] + } + GraderMulti: + type: object + title: MultiGrader + description: A MultiGrader object combines the output of multiple graders to produce a single score. + properties: + type: + type: string + enum: + - multi + default: multi + description: The object type, which is always `multi`. + x-stainless-const: true + name: + type: string + description: The name of the grader. + graders: + anyOf: + - $ref: '#/components/schemas/GraderStringCheck' + - $ref: '#/components/schemas/GraderTextSimilarity' + - $ref: '#/components/schemas/GraderPython' + - $ref: '#/components/schemas/GraderScoreModel' + - $ref: '#/components/schemas/GraderLabelModel' + calculate_output: + type: string + description: A formula to calculate the output based on grader results. + required: + - name + - type + - graders + - calculate_output + x-oaiMeta: + name: Multi Grader + group: graders + example: | + { + "type": "multi", + "name": "example multi grader", + "graders": [ + { + "type": "text_similarity", + "name": "example text similarity grader", + "input": "The graded text", + "reference": "The reference text", + "evaluation_metric": "fuzzy_match" + }, + { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + } + ], + "calculate_output": "0.5 * text_similarity_score + 0.5 * string_check_score)" + } + GraderPython: + type: object + title: PythonGrader + description: | + A PythonGrader object that runs a python script on the input. + properties: + type: + type: string + enum: + - python + description: The object type, which is always `python`. + x-stainless-const: true + name: + type: string + description: The name of the grader. + source: + type: string + description: The source code of the python script. + image_tag: + type: string + description: The image tag to use for the python script. + required: + - type + - name + - source + x-oaiMeta: + name: Python Grader + group: graders + example: | + { + "type": "python", + "name": "Example python grader", + "image_tag": "2025-05-08", + "source": """ + def grade(sample: dict, item: dict) -> float: + \""" + Returns 1.0 if `output_text` equals `label`, otherwise 0.0. + \""" + output = sample.get("output_text") + label = item.get("label") + return 1.0 if output == label else 0.0 + """, + } + GraderScoreModel: + type: object + title: ScoreModelGrader + description: | + A ScoreModelGrader object that uses a model to assign a score to the input. + properties: + type: + type: string + enum: + - score_model + description: The object type, which is always `score_model`. + x-stainless-const: true + name: + type: string + description: The name of the grader. + model: + type: string + description: The model to use for the evaluation. + sampling_params: + type: object + description: The sampling parameters for the model. + properties: + seed: + anyOf: + - type: integer + description: | + A seed value to initialize the randomness, during sampling. + - type: 'null' + top_p: + anyOf: + - type: number + default: 1 + example: 1 + description: | + An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + - type: 'null' + temperature: + anyOf: + - type: number + description: | + A higher temperature increases randomness in the outputs. + - type: 'null' + max_completions_tokens: + anyOf: + - type: integer + minimum: 1 + description: | + The maximum number of tokens the grader model may generate in its response. + - type: 'null' + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + input: + type: array + items: + $ref: '#/components/schemas/EvalItem' + description: The input text. This may include template strings. + range: + type: array + items: + type: number + min_items: 2 + max_items: 2 + description: The range of the score. Defaults to `[0, 1]`. + required: + - type + - name + - input + - model + x-oaiMeta: + name: Score Model Grader + group: graders + example: | + { + "type": "score_model", + "name": "Example score model grader", + "input": [ + { + "role": "user", + "content": ( + "Score how close the reference answer is to the model answer. Score 1.0 if they are the same and 0.0 if they are different." + " Return just a floating point score\n\n" + " Reference answer: {{item.label}}\n\n" + " Model answer: {{sample.output_text}}" + ), + } + ], + "model": "o4-mini-2025-04-16", + "sampling_params": { + "temperature": 1, + "top_p": 1, + "seed": 42, + "max_completions_tokens": 32768, + "reasoning_effort": "medium" + }, + } + GraderStringCheck: + type: object + title: StringCheckGrader + description: > + A StringCheckGrader object that performs a string comparison between input and reference using a + specified operation. + properties: + type: + type: string + enum: + - string_check + description: The object type, which is always `string_check`. + x-stainless-const: true + name: + type: string + description: The name of the grader. + input: + type: string + description: The input text. This may include template strings. + reference: + type: string + description: The reference text. This may include template strings. + operation: + type: string + enum: + - eq + - ne + - like + - ilike + description: The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. + required: + - type + - name + - input + - reference + - operation + x-oaiMeta: + name: String Check Grader + group: graders + example: | + { + "type": "string_check", + "name": "Example string check grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "operation": "eq" + } + GraderTextSimilarity: + type: object + title: TextSimilarityGrader + description: | + A TextSimilarityGrader object which grades text based on similarity metrics. + properties: + type: + type: string + enum: + - text_similarity + default: text_similarity + description: The type of grader. + x-stainless-const: true + name: + type: string + description: The name of the grader. + input: + type: string + description: The text being graded. + reference: + type: string + description: The text being graded against. + evaluation_metric: + type: string + enum: + - cosine + - fuzzy_match + - bleu + - gleu + - meteor + - rouge_1 + - rouge_2 + - rouge_3 + - rouge_4 + - rouge_5 + - rouge_l + description: | + The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, + `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, + or `rouge_l`. + required: + - type + - name + - input + - reference + - evaluation_metric + x-oaiMeta: + name: Text Similarity Grader + group: graders + example: | + { + "type": "text_similarity", + "name": "Example text similarity grader", + "input": "{{sample.output_text}}", + "reference": "{{item.label}}", + "evaluation_metric": "fuzzy_match" + } + Image: + type: object + description: Represents the content or the URL of an image generated by the OpenAI API. + properties: + b64_json: + type: string + description: >- + The base64-encoded JSON of the generated image. Default value for `gpt-image-1`, and only present + if `response_format` is set to `b64_json` for `dall-e-2` and `dall-e-3`. + url: + type: string + description: >- + When using `dall-e-2` or `dall-e-3`, the URL of the generated image if `response_format` is set to + `url` (default value). Unsupported for `gpt-image-1`. + revised_prompt: + type: string + description: For `dall-e-3` only, the revised prompt that was used to generate the image. + ImageEditCompletedEvent: + type: object + description: | + Emitted when image editing has completed and the final image is available. + properties: + type: + type: string + description: | + The type of the event. Always `image_edit.completed`. + enum: + - image_edit.completed + x-stainless-const: true + b64_json: + type: string + description: | + Base64-encoded final edited image data, suitable for rendering as an image. + created_at: + type: integer + description: | + The Unix timestamp when the event was created. + size: + type: string + description: | + The size of the edited image. + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + - auto + quality: + type: string + description: | + The quality setting for the edited image. + enum: + - low + - medium + - high + - auto + background: + type: string + description: | + The background setting for the edited image. + enum: + - transparent + - opaque + - auto + output_format: + type: string + description: | + The output format for the edited image. + enum: + - png + - webp + - jpeg + usage: + $ref: '#/components/schemas/ImagesUsage' + required: + - type + - b64_json + - created_at + - size + - quality + - background + - output_format + - usage + x-oaiMeta: + name: image_edit.completed + group: images + example: | + { + "type": "image_edit.completed", + "b64_json": "...", + "created_at": 1620000000, + "size": "1024x1024", + "quality": "high", + "background": "transparent", + "output_format": "png", + "usage": { + "total_tokens": 100, + "input_tokens": 50, + "output_tokens": 50, + "input_tokens_details": { + "text_tokens": 10, + "image_tokens": 40 + } + } + } + ImageEditPartialImageEvent: + type: object + description: | + Emitted when a partial image is available during image editing streaming. + properties: + type: + type: string + description: | + The type of the event. Always `image_edit.partial_image`. + enum: + - image_edit.partial_image + x-stainless-const: true + b64_json: + type: string + description: | + Base64-encoded partial image data, suitable for rendering as an image. + created_at: + type: integer + description: | + The Unix timestamp when the event was created. + size: + type: string + description: | + The size of the requested edited image. + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + - auto + quality: + type: string + description: | + The quality setting for the requested edited image. + enum: + - low + - medium + - high + - auto + background: + type: string + description: | + The background setting for the requested edited image. + enum: + - transparent + - opaque + - auto + output_format: + type: string + description: | + The output format for the requested edited image. + enum: + - png + - webp + - jpeg + partial_image_index: + type: integer + description: | + 0-based index for the partial image (streaming). + required: + - type + - b64_json + - created_at + - size + - quality + - background + - output_format + - partial_image_index + x-oaiMeta: + name: image_edit.partial_image + group: images + example: | + { + "type": "image_edit.partial_image", + "b64_json": "...", + "created_at": 1620000000, + "size": "1024x1024", + "quality": "high", + "background": "transparent", + "output_format": "png", + "partial_image_index": 0 + } + ImageEditStreamEvent: + anyOf: + - $ref: '#/components/schemas/ImageEditPartialImageEvent' + - $ref: '#/components/schemas/ImageEditCompletedEvent' + discriminator: + propertyName: type + ImageGenCompletedEvent: + type: object + description: | + Emitted when image generation has completed and the final image is available. + properties: + type: + type: string + description: | + The type of the event. Always `image_generation.completed`. + enum: + - image_generation.completed + x-stainless-const: true + b64_json: + type: string + description: | + Base64-encoded image data, suitable for rendering as an image. + created_at: + type: integer + description: | + The Unix timestamp when the event was created. + size: + type: string + description: | + The size of the generated image. + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + - auto + quality: + type: string + description: | + The quality setting for the generated image. + enum: + - low + - medium + - high + - auto + background: + type: string + description: | + The background setting for the generated image. + enum: + - transparent + - opaque + - auto + output_format: + type: string + description: | + The output format for the generated image. + enum: + - png + - webp + - jpeg + usage: + $ref: '#/components/schemas/ImagesUsage' + required: + - type + - b64_json + - created_at + - size + - quality + - background + - output_format + - usage + x-oaiMeta: + name: image_generation.completed + group: images + example: | + { + "type": "image_generation.completed", + "b64_json": "...", + "created_at": 1620000000, + "size": "1024x1024", + "quality": "high", + "background": "transparent", + "output_format": "png", + "usage": { + "total_tokens": 100, + "input_tokens": 50, + "output_tokens": 50, + "input_tokens_details": { + "text_tokens": 10, + "image_tokens": 40 + } + } + } + ImageGenPartialImageEvent: + type: object + description: | + Emitted when a partial image is available during image generation streaming. + properties: + type: + type: string + description: | + The type of the event. Always `image_generation.partial_image`. + enum: + - image_generation.partial_image + x-stainless-const: true + b64_json: + type: string + description: | + Base64-encoded partial image data, suitable for rendering as an image. + created_at: + type: integer + description: | + The Unix timestamp when the event was created. + size: + type: string + description: | + The size of the requested image. + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + - auto + quality: + type: string + description: | + The quality setting for the requested image. + enum: + - low + - medium + - high + - auto + background: + type: string + description: | + The background setting for the requested image. + enum: + - transparent + - opaque + - auto + output_format: + type: string + description: | + The output format for the requested image. + enum: + - png + - webp + - jpeg + partial_image_index: + type: integer + description: | + 0-based index for the partial image (streaming). + required: + - type + - b64_json + - created_at + - size + - quality + - background + - output_format + - partial_image_index + x-oaiMeta: + name: image_generation.partial_image + group: images + example: | + { + "type": "image_generation.partial_image", + "b64_json": "...", + "created_at": 1620000000, + "size": "1024x1024", + "quality": "high", + "background": "transparent", + "output_format": "png", + "partial_image_index": 0 + } + ImageGenStreamEvent: + anyOf: + - $ref: '#/components/schemas/ImageGenPartialImageEvent' + - $ref: '#/components/schemas/ImageGenCompletedEvent' + discriminator: + propertyName: type + ImageGenTool: + type: object + title: Image generation tool + description: | + A tool that generates images using a model like `gpt-image-1`. + properties: + type: + type: string + enum: + - image_generation + description: | + The type of the image generation tool. Always `image_generation`. + x-stainless-const: true + model: + type: string + enum: + - gpt-image-1 + - gpt-image-1-mini + description: | + The image generation model to use. Default: `gpt-image-1`. + default: gpt-image-1 + quality: + type: string + enum: + - low + - medium + - high + - auto + description: | + The quality of the generated image. One of `low`, `medium`, `high`, + or `auto`. Default: `auto`. + default: auto + size: + type: string + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + - auto + description: | + The size of the generated image. One of `1024x1024`, `1024x1536`, + `1536x1024`, or `auto`. Default: `auto`. + default: auto + output_format: + type: string + enum: + - png + - webp + - jpeg + description: | + The output format of the generated image. One of `png`, `webp`, or + `jpeg`. Default: `png`. + default: png + output_compression: + type: integer + minimum: 0 + maximum: 100 + description: | + Compression level for the output image. Default: 100. + default: 100 + moderation: + type: string + enum: + - auto + - low + description: | + Moderation level for the generated image. Default: `auto`. + default: auto + background: + type: string + enum: + - transparent + - opaque + - auto + description: | + Background type for the generated image. One of `transparent`, + `opaque`, or `auto`. Default: `auto`. + default: auto + input_fidelity: + anyOf: + - $ref: '#/components/schemas/InputFidelity' + - type: 'null' + input_image_mask: + type: object + description: | + Optional mask for inpainting. Contains `image_url` + (string, optional) and `file_id` (string, optional). + properties: + image_url: + type: string + description: | + Base64-encoded mask image. + file_id: + type: string + description: | + File ID for the mask image. + required: [] + additionalProperties: false + partial_images: + type: integer + minimum: 0 + maximum: 3 + description: | + Number of partial images to generate in streaming mode, from 0 (default value) to 3. + default: 0 + required: + - type + ImageGenToolCall: + type: object + title: Image generation call + description: | + An image generation request made by the model. + properties: + type: + type: string + enum: + - image_generation_call + description: | + The type of the image generation call. Always `image_generation_call`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the image generation call. + status: + type: string + enum: + - in_progress + - completed + - generating + - failed + description: | + The status of the image generation call. + result: + anyOf: + - type: string + description: | + The generated image encoded in base64. + - type: 'null' + required: + - type + - id + - status + - result + ImagesResponse: + type: object + title: Image generation response + description: The response from the image generation endpoint. + properties: + created: + type: integer + description: The Unix timestamp (in seconds) of when the image was created. + data: + type: array + description: The list of generated images. + items: + $ref: '#/components/schemas/Image' + background: + type: string + description: The background parameter used for the image generation. Either `transparent` or `opaque`. + enum: + - transparent + - opaque + output_format: + type: string + description: The output format of the image generation. Either `png`, `webp`, or `jpeg`. + enum: + - png + - webp + - jpeg + size: + type: string + description: The size of the image generated. Either `1024x1024`, `1024x1536`, or `1536x1024`. + enum: + - 1024x1024 + - 1024x1536 + - 1536x1024 + quality: + type: string + description: The quality of the image generated. Either `low`, `medium`, or `high`. + enum: + - low + - medium + - high + usage: + $ref: '#/components/schemas/ImageGenUsage' + required: + - created + x-oaiMeta: + name: The image generation response + group: images + example: | + { + "created": 1713833628, + "data": [ + { + "b64_json": "..." + } + ], + "background": "transparent", + "output_format": "png", + "size": "1024x1024", + "quality": "high", + "usage": { + "total_tokens": 100, + "input_tokens": 50, + "output_tokens": 50, + "input_tokens_details": { + "text_tokens": 10, + "image_tokens": 40 + } + } + } + ImagesUsage: + type: object + description: | + For `gpt-image-1` only, the token usage information for the image generation. + required: + - total_tokens + - input_tokens + - output_tokens + - input_tokens_details + properties: + total_tokens: + type: integer + description: | + The total number of tokens (images and text) used for the image generation. + input_tokens: + type: integer + description: The number of tokens (images and text) in the input prompt. + output_tokens: + type: integer + description: The number of image tokens in the output image. + input_tokens_details: + type: object + description: The input tokens detailed information for the image generation. + required: + - text_tokens + - image_tokens + properties: + text_tokens: + type: integer + description: The number of text tokens in the input prompt. + image_tokens: + type: integer + description: The number of image tokens in the input prompt. + InputAudio: + type: object + title: Input audio + description: | + An audio input to the model. + properties: + type: + type: string + description: | + The type of the input item. Always `input_audio`. + enum: + - input_audio + x-stainless-const: true + input_audio: + type: object + properties: + data: + type: string + description: | + Base64-encoded audio data. + format: + type: string + description: | + The format of the audio data. Currently supported formats are `mp3` and + `wav`. + enum: + - mp3 + - wav + required: + - data + - format + required: + - type + - input_audio + InputContent: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputTextContent' + - $ref: '#/components/schemas/InputImageContent' + - $ref: '#/components/schemas/InputFileContent' + InputItem: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/EasyInputMessage' + - type: object + title: Item + description: | + An item representing part of the context for the response to be + generated by the model. Can contain text, images, and audio inputs, + as well as previous assistant responses and tool call outputs. + $ref: '#/components/schemas/Item' + - $ref: '#/components/schemas/ItemReferenceParam' + InputMessage: + type: object + title: Input message + description: | + A message input to the model with a role indicating instruction following + hierarchy. Instructions given with the `developer` or `system` role take + precedence over instructions given with the `user` role. + properties: + type: + type: string + description: | + The type of the message input. Always set to `message`. + enum: + - message + x-stainless-const: true + role: + type: string + description: | + The role of the message input. One of `user`, `system`, or `developer`. + enum: + - user + - system + - developer + status: + type: string + description: | + The status of item. One of `in_progress`, `completed`, or + `incomplete`. Populated when items are returned via API. + enum: + - in_progress + - completed + - incomplete + content: + $ref: '#/components/schemas/InputMessageContentList' + required: + - role + - content + InputMessageContentList: + type: array + title: Input item content list + description: | + A list of one or many input items to the model, containing different content + types. + items: + $ref: '#/components/schemas/InputContent' + InputMessageResource: + allOf: + - $ref: '#/components/schemas/InputMessage' + - type: object + properties: + id: + type: string + description: | + The unique ID of the message input. + required: + - id + InputParam: + description: | + Text, image, or file inputs to the model, used to generate a response. + + Learn more: + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Image inputs](https://platform.openai.com/docs/guides/images) + - [File inputs](https://platform.openai.com/docs/guides/pdf-files) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state) + - [Function calling](https://platform.openai.com/docs/guides/function-calling) + anyOf: + - type: string + title: Text input + description: | + A text input to the model, equivalent to a text input with the + `user` role. + - type: array + title: Input item list + description: | + A list of one or many input items to the model, containing + different content types. + items: + $ref: '#/components/schemas/InputItem' + Invite: + type: object + description: Represents an individual `invite` to the organization. + properties: + object: + type: string + enum: + - organization.invite + description: The object type, which is always `organization.invite` + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints + email: + type: string + description: The email address of the individual to whom the invite was sent + role: + type: string + enum: + - owner + - reader + description: '`owner` or `reader`' + status: + type: string + enum: + - accepted + - expired + - pending + description: '`accepted`,`expired`, or `pending`' + invited_at: + type: integer + description: The Unix timestamp (in seconds) of when the invite was sent. + expires_at: + type: integer + description: The Unix timestamp (in seconds) of when the invite expires. + accepted_at: + type: integer + description: The Unix timestamp (in seconds) of when the invite was accepted. + projects: + type: array + description: The projects that were granted membership upon acceptance of the invite. + items: + type: object + properties: + id: + type: string + description: Project's public ID + role: + type: string + enum: + - member + - owner + description: Project membership role + required: + - object + - id + - email + - role + - status + - invited_at + - expires_at + x-oaiMeta: + name: The invite object + example: | + { + "object": "organization.invite", + "id": "invite-abc", + "email": "user@example.com", + "role": "owner", + "status": "accepted", + "invited_at": 1711471533, + "expires_at": 1711471533, + "accepted_at": 1711471533, + "projects": [ + { + "id": "project-xyz", + "role": "member" + } + ] + } + InviteDeleteResponse: + type: object + properties: + object: + type: string + enum: + - organization.invite.deleted + description: The object type, which is always `organization.invite.deleted` + x-stainless-const: true + id: + type: string + deleted: + type: boolean + required: + - object + - id + - deleted + InviteListResponse: + type: object + properties: + object: + type: string + enum: + - list + description: The object type, which is always `list` + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/Invite' + first_id: + type: string + description: The first `invite_id` in the retrieved `list` + last_id: + type: string + description: The last `invite_id` in the retrieved `list` + has_more: + type: boolean + description: The `has_more` property is used for pagination to indicate there are additional results. + required: + - object + - data + InviteRequest: + type: object + properties: + email: + type: string + description: Send an email to this address + role: + type: string + enum: + - reader + - owner + description: '`owner` or `reader`' + projects: + type: array + description: >- + An array of projects to which membership is granted at the same time the org invite is accepted. + If omitted, the user will be invited to the default project for compatibility with legacy + behavior. + items: + type: object + properties: + id: + type: string + description: Project's public ID + role: + type: string + enum: + - member + - owner + description: Project membership role + required: + - id + - role + required: + - email + - role + Item: + type: object + description: | + Content item used to generate a response. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputMessage' + - $ref: '#/components/schemas/OutputMessage' + - $ref: '#/components/schemas/FileSearchToolCall' + - $ref: '#/components/schemas/ComputerToolCall' + - $ref: '#/components/schemas/ComputerCallOutputItemParam' + - $ref: '#/components/schemas/WebSearchToolCall' + - $ref: '#/components/schemas/FunctionToolCall' + - $ref: '#/components/schemas/FunctionCallOutputItemParam' + - $ref: '#/components/schemas/ReasoningItem' + - $ref: '#/components/schemas/ImageGenToolCall' + - $ref: '#/components/schemas/CodeInterpreterToolCall' + - $ref: '#/components/schemas/LocalShellToolCall' + - $ref: '#/components/schemas/LocalShellToolCallOutput' + - $ref: '#/components/schemas/FunctionShellCallItemParam' + - $ref: '#/components/schemas/FunctionShellCallOutputItemParam' + - $ref: '#/components/schemas/ApplyPatchToolCallItemParam' + - $ref: '#/components/schemas/ApplyPatchToolCallOutputItemParam' + - $ref: '#/components/schemas/MCPListTools' + - $ref: '#/components/schemas/MCPApprovalRequest' + - $ref: '#/components/schemas/MCPApprovalResponse' + - $ref: '#/components/schemas/MCPToolCall' + - $ref: '#/components/schemas/CustomToolCallOutput' + - $ref: '#/components/schemas/CustomToolCall' + ItemResource: + description: | + Content item used to generate a response. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputMessageResource' + - $ref: '#/components/schemas/OutputMessage' + - $ref: '#/components/schemas/FileSearchToolCall' + - $ref: '#/components/schemas/ComputerToolCall' + - $ref: '#/components/schemas/ComputerToolCallOutputResource' + - $ref: '#/components/schemas/WebSearchToolCall' + - $ref: '#/components/schemas/FunctionToolCallResource' + - $ref: '#/components/schemas/FunctionToolCallOutputResource' + - $ref: '#/components/schemas/ImageGenToolCall' + - $ref: '#/components/schemas/CodeInterpreterToolCall' + - $ref: '#/components/schemas/LocalShellToolCall' + - $ref: '#/components/schemas/LocalShellToolCallOutput' + - $ref: '#/components/schemas/FunctionShellCall' + - $ref: '#/components/schemas/FunctionShellCallOutput' + - $ref: '#/components/schemas/ApplyPatchToolCall' + - $ref: '#/components/schemas/ApplyPatchToolCallOutput' + - $ref: '#/components/schemas/MCPListTools' + - $ref: '#/components/schemas/MCPApprovalRequest' + - $ref: '#/components/schemas/MCPApprovalResponseResource' + - $ref: '#/components/schemas/MCPToolCall' + ListAssistantsResponse: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/AssistantObject' + first_id: + type: string + example: asst_abc123 + last_id: + type: string + example: asst_abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + x-oaiMeta: + name: List assistants response object + group: chat + example: | + { + "object": "list", + "data": [ + { + "id": "asst_abc123", + "object": "assistant", + "created_at": 1698982736, + "name": "Coding Tutor", + "description": null, + "model": "gpt-4o", + "instructions": "You are a helpful assistant designed to make me better at coding!", + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + }, + { + "id": "asst_abc456", + "object": "assistant", + "created_at": 1698982718, + "name": "My Assistant", + "description": null, + "model": "gpt-4o", + "instructions": "You are a helpful assistant designed to make me better at coding!", + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + }, + { + "id": "asst_abc789", + "object": "assistant", + "created_at": 1698982643, + "name": null, + "description": null, + "model": "gpt-4o", + "instructions": null, + "tools": [], + "tool_resources": {}, + "metadata": {}, + "top_p": 1.0, + "temperature": 1.0, + "response_format": "auto" + } + ], + "first_id": "asst_abc123", + "last_id": "asst_abc789", + "has_more": false + } + ListAuditLogsResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/AuditLog' + first_id: + type: string + example: audit_log-defb456h8dks + last_id: + type: string + example: audit_log-hnbkd8s93s + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ListBatchesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Batch' + first_id: + type: string + example: batch_abc123 + last_id: + type: string + example: batch_abc456 + has_more: + type: boolean + object: + type: string + enum: + - list + x-stainless-const: true + required: + - object + - data + - has_more + ListCertificatesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Certificate' + first_id: + type: string + example: cert_abc + last_id: + type: string + example: cert_abc + has_more: + type: boolean + object: + type: string + enum: + - list + x-stainless-const: true + required: + - object + - data + - has_more + ListFilesResponse: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/OpenAIFile' + first_id: + type: string + example: file-abc123 + last_id: + type: string + example: file-abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + ListFineTuningCheckpointPermissionResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FineTuningCheckpointPermission' + object: + type: string + enum: + - list + x-stainless-const: true + first_id: + anyOf: + - type: string + - type: 'null' + last_id: + anyOf: + - type: string + - type: 'null' + has_more: + type: boolean + required: + - object + - data + - has_more + ListFineTuningJobCheckpointsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FineTuningJobCheckpoint' + object: + type: string + enum: + - list + x-stainless-const: true + first_id: + anyOf: + - type: string + - type: 'null' + last_id: + anyOf: + - type: string + - type: 'null' + has_more: + type: boolean + required: + - object + - data + - has_more + ListFineTuningJobEventsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FineTuningJobEvent' + object: + type: string + enum: + - list + x-stainless-const: true + has_more: + type: boolean + required: + - object + - data + - has_more + ListMessagesResponse: + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/MessageObject' + first_id: + type: string + example: msg_abc123 + last_id: + type: string + example: msg_abc123 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + ListModelsResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/Model' + required: + - object + - data + ListPaginatedFineTuningJobsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FineTuningJob' + has_more: + type: boolean + object: + type: string + enum: + - list + x-stainless-const: true + required: + - object + - data + - has_more + ListRunStepsResponse: + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/RunStepObject' + first_id: + type: string + example: step_abc123 + last_id: + type: string + example: step_abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + ListRunsResponse: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/RunObject' + first_id: + type: string + example: run_abc123 + last_id: + type: string + example: run_abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + ListVectorStoreFilesResponse: + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreFileObject' + first_id: + type: string + example: file-abc123 + last_id: + type: string + example: file-abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + ListVectorStoresResponse: + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreObject' + first_id: + type: string + example: vs_abc123 + last_id: + type: string + example: vs_abc456 + has_more: + type: boolean + example: false + required: + - object + - data + - first_id + - last_id + - has_more + LocalShellToolCall: + type: object + title: Local shell call + description: | + A tool call to run a command on the local shell. + properties: + type: + type: string + enum: + - local_shell_call + description: | + The type of the local shell call. Always `local_shell_call`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the local shell call. + call_id: + type: string + description: | + The unique ID of the local shell tool call generated by the model. + action: + $ref: '#/components/schemas/LocalShellExecAction' + status: + type: string + enum: + - in_progress + - completed + - incomplete + description: | + The status of the local shell call. + required: + - type + - id + - call_id + - action + - status + LocalShellToolCallOutput: + type: object + title: Local shell call output + description: | + The output of a local shell tool call. + properties: + type: + type: string + enum: + - local_shell_call_output + description: | + The type of the local shell tool call output. Always `local_shell_call_output`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the local shell tool call generated by the model. + output: + type: string + description: | + A JSON string of the output of the local shell tool call. + status: + anyOf: + - type: string + enum: + - in_progress + - completed + - incomplete + description: | + The status of the item. One of `in_progress`, `completed`, or `incomplete`. + - type: 'null' + required: + - id + - type + - call_id + - output + LogProbProperties: + type: object + description: | + A log probability object. + properties: + token: + type: string + description: | + The token that was used to generate the log probability. + logprob: + type: number + description: | + The log probability of the token. + bytes: + type: array + items: + type: integer + description: | + The bytes that were used to generate the log probability. + required: + - token + - logprob + - bytes + MCPApprovalRequest: + type: object + title: MCP approval request + description: | + A request for human approval of a tool invocation. + properties: + type: + type: string + enum: + - mcp_approval_request + description: | + The type of the item. Always `mcp_approval_request`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the approval request. + server_label: + type: string + description: | + The label of the MCP server making the request. + name: + type: string + description: | + The name of the tool to run. + arguments: + type: string + description: | + A JSON string of arguments for the tool. + required: + - type + - id + - server_label + - name + - arguments + MCPApprovalResponse: + type: object + title: MCP approval response + description: | + A response to an MCP approval request. + properties: + type: + type: string + enum: + - mcp_approval_response + description: | + The type of the item. Always `mcp_approval_response`. + x-stainless-const: true + id: + anyOf: + - type: string + description: | + The unique ID of the approval response + - type: 'null' + approval_request_id: + type: string + description: | + The ID of the approval request being answered. + approve: + type: boolean + description: | + Whether the request was approved. + reason: + anyOf: + - type: string + description: | + Optional reason for the decision. + - type: 'null' + required: + - type + - request_id + - approve + - approval_request_id + MCPApprovalResponseResource: + type: object + title: MCP approval response + description: | + A response to an MCP approval request. + properties: + type: + type: string + enum: + - mcp_approval_response + description: | + The type of the item. Always `mcp_approval_response`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the approval response + approval_request_id: + type: string + description: | + The ID of the approval request being answered. + approve: + type: boolean + description: | + Whether the request was approved. + reason: + anyOf: + - type: string + description: | + Optional reason for the decision. + - type: 'null' + required: + - type + - id + - request_id + - approve + - approval_request_id + MCPListTools: + type: object + title: MCP list tools + description: | + A list of tools available on an MCP server. + properties: + type: + type: string + enum: + - mcp_list_tools + description: | + The type of the item. Always `mcp_list_tools`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the list. + server_label: + type: string + description: | + The label of the MCP server. + tools: + type: array + items: + $ref: '#/components/schemas/MCPListToolsTool' + description: | + The tools available on the server. + error: + anyOf: + - type: string + description: | + Error message if the server could not list tools. + - type: 'null' + required: + - type + - id + - server_label + - tools + MCPListToolsTool: + type: object + title: MCP list tools tool + description: | + A tool available on an MCP server. + properties: + name: + type: string + description: | + The name of the tool. + description: + anyOf: + - type: string + description: | + The description of the tool. + - type: 'null' + input_schema: + type: object + description: | + The JSON schema describing the tool's input. + annotations: + anyOf: + - type: object + description: | + Additional annotations about the tool. + - type: 'null' + required: + - name + - input_schema + MCPTool: + type: object + title: MCP tool + description: | + Give the model access to additional tools via remote Model Context Protocol + (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp). + properties: + type: + type: string + enum: + - mcp + description: The type of the MCP tool. Always `mcp`. + x-stainless-const: true + server_label: + type: string + description: | + A label for this MCP server, used to identify it in tool calls. + server_url: + type: string + description: | + The URL for the MCP server. One of `server_url` or `connector_id` must be + provided. + connector_id: + type: string + enum: + - connector_dropbox + - connector_gmail + - connector_googlecalendar + - connector_googledrive + - connector_microsoftteams + - connector_outlookcalendar + - connector_outlookemail + - connector_sharepoint + description: | + Identifier for service connectors, like those available in ChatGPT. One of + `server_url` or `connector_id` must be provided. Learn more about service + connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors). + + Currently supported `connector_id` values are: + + - Dropbox: `connector_dropbox` + - Gmail: `connector_gmail` + - Google Calendar: `connector_googlecalendar` + - Google Drive: `connector_googledrive` + - Microsoft Teams: `connector_microsoftteams` + - Outlook Calendar: `connector_outlookcalendar` + - Outlook Email: `connector_outlookemail` + - SharePoint: `connector_sharepoint` + authorization: + type: string + description: | + An OAuth access token that can be used with a remote MCP server, either + with a custom MCP server URL or a service connector. Your application + must handle the OAuth authorization flow and provide the token here. + server_description: + type: string + description: | + Optional description of the MCP server, used to provide more context. + headers: + anyOf: + - type: object + additionalProperties: + type: string + description: | + Optional HTTP headers to send to the MCP server. Use for authentication + or other purposes. + - type: 'null' + allowed_tools: + anyOf: + - description: | + List of allowed tool names or a filter object. + anyOf: + - type: array + title: MCP allowed tools + description: A string array of allowed tool names + items: + type: string + - $ref: '#/components/schemas/MCPToolFilter' + - type: 'null' + require_approval: + anyOf: + - description: Specify which of the MCP server's tools require approval. + default: always + anyOf: + - type: object + title: MCP tool approval filter + description: | + Specify which of the MCP server's tools require approval. Can be + `always`, `never`, or a filter object associated with tools + that require approval. + properties: + always: + $ref: '#/components/schemas/MCPToolFilter' + never: + $ref: '#/components/schemas/MCPToolFilter' + additionalProperties: false + - type: string + title: MCP tool approval setting + description: | + Specify a single approval policy for all tools. One of `always` or + `never`. When set to `always`, all tools will require approval. When + set to `never`, all tools will not require approval. + enum: + - always + - never + - type: 'null' + required: + - type + - server_label + MCPToolCall: + type: object + title: MCP tool call + description: | + An invocation of a tool on an MCP server. + properties: + type: + type: string + enum: + - mcp_call + description: | + The type of the item. Always `mcp_call`. + x-stainless-const: true + id: + type: string + description: | + The unique ID of the tool call. + server_label: + type: string + description: | + The label of the MCP server running the tool. + name: + type: string + description: | + The name of the tool that was run. + arguments: + type: string + description: | + A JSON string of the arguments passed to the tool. + output: + anyOf: + - type: string + description: | + The output from the tool call. + - type: 'null' + error: + anyOf: + - type: string + description: | + The error from the tool call, if any. + - type: 'null' + status: + $ref: '#/components/schemas/MCPToolCallStatus' + description: > + The status of the tool call. One of `in_progress`, `completed`, `incomplete`, `calling`, or + `failed`. + approval_request_id: + anyOf: + - type: string + description: > + Unique identifier for the MCP tool call approval request. + + Include this value in a subsequent `mcp_approval_response` input to approve or reject the + corresponding tool call. + - type: 'null' + required: + - type + - id + - server_label + - name + - arguments + MCPToolFilter: + type: object + title: MCP tool filter + description: | + A filter object to specify which tools are allowed. + properties: + tool_names: + type: array + title: MCP allowed tools + items: + type: string + description: List of allowed tool names. + read_only: + type: boolean + description: > + Indicates whether or not a tool modifies data or is read-only. If an + + MCP server is [annotated with + `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint), + + it will match this filter. + required: [] + additionalProperties: false + MessageContentImageFileObject: + title: Image file + type: object + description: >- + References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a + message. + properties: + type: + description: Always `image_file`. + type: string + enum: + - image_file + x-stainless-const: true + image_file: + type: object + properties: + file_id: + description: >- + The [File](https://platform.openai.com/docs/api-reference/files) ID of the image in the + message content. Set `purpose="vision"` when uploading the File if you need to later display + the file content. + type: string + detail: + type: string + description: >- + Specifies the detail level of the image if specified by the user. `low` uses fewer tokens, you + can opt in to high resolution using `high`. + enum: + - auto + - low + - high + default: auto + required: + - file_id + required: + - type + - image_file + MessageContentImageUrlObject: + title: Image URL + type: object + description: References an image URL in the content of a message. + properties: + type: + type: string + enum: + - image_url + description: The type of the content part. + x-stainless-const: true + image_url: + type: object + properties: + url: + type: string + description: 'The external URL of the image, must be a supported image types: jpeg, jpg, png, gif, webp.' + format: uri + detail: + type: string + description: >- + Specifies the detail level of the image. `low` uses fewer tokens, you can opt in to high + resolution using `high`. Default value is `auto` + enum: + - auto + - low + - high + default: auto + required: + - url + required: + - type + - image_url + MessageContentRefusalObject: + title: Refusal + type: object + description: The refusal content generated by the assistant. + properties: + type: + description: Always `refusal`. + type: string + enum: + - refusal + x-stainless-const: true + refusal: + type: string + required: + - type + - refusal + MessageContentTextAnnotationsFileCitationObject: + title: File citation + type: object + description: >- + A citation within the message that points to a specific quote from a specific File associated with the + assistant or the message. Generated when the assistant uses the "file_search" tool to search files. + properties: + type: + description: Always `file_citation`. + type: string + enum: + - file_citation + x-stainless-const: true + text: + description: The text in the message content that needs to be replaced. + type: string + file_citation: + type: object + properties: + file_id: + description: The ID of the specific File the citation is from. + type: string + required: + - file_id + start_index: + type: integer + minimum: 0 + end_index: + type: integer + minimum: 0 + required: + - type + - text + - file_citation + - start_index + - end_index + MessageContentTextAnnotationsFilePathObject: + title: File path + type: object + description: >- + A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a + file. + properties: + type: + description: Always `file_path`. + type: string + enum: + - file_path + x-stainless-const: true + text: + description: The text in the message content that needs to be replaced. + type: string + file_path: + type: object + properties: + file_id: + description: The ID of the file that was generated. + type: string + required: + - file_id + start_index: + type: integer + minimum: 0 + end_index: + type: integer + minimum: 0 + required: + - type + - text + - file_path + - start_index + - end_index + MessageContentTextObject: + title: Text + type: object + description: The text content that is part of a message. + properties: + type: + description: Always `text`. + type: string + enum: + - text + x-stainless-const: true + text: + type: object + properties: + value: + description: The data that makes up the text. + type: string + annotations: + type: array + items: + $ref: '#/components/schemas/TextAnnotation' + required: + - value + - annotations + required: + - type + - text + MessageDeltaContentImageFileObject: + title: Image file + type: object + description: >- + References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a + message. + properties: + index: + type: integer + description: The index of the content part in the message. + type: + description: Always `image_file`. + type: string + enum: + - image_file + x-stainless-const: true + image_file: + type: object + properties: + file_id: + description: >- + The [File](https://platform.openai.com/docs/api-reference/files) ID of the image in the + message content. Set `purpose="vision"` when uploading the File if you need to later display + the file content. + type: string + detail: + type: string + description: >- + Specifies the detail level of the image if specified by the user. `low` uses fewer tokens, you + can opt in to high resolution using `high`. + enum: + - auto + - low + - high + default: auto + required: + - index + - type + MessageDeltaContentImageUrlObject: + title: Image URL + type: object + description: References an image URL in the content of a message. + properties: + index: + type: integer + description: The index of the content part in the message. + type: + description: Always `image_url`. + type: string + enum: + - image_url + x-stainless-const: true + image_url: + type: object + properties: + url: + description: 'The URL of the image, must be a supported image types: jpeg, jpg, png, gif, webp.' + type: string + detail: + type: string + description: >- + Specifies the detail level of the image. `low` uses fewer tokens, you can opt in to high + resolution using `high`. + enum: + - auto + - low + - high + default: auto + required: + - index + - type + MessageDeltaContentRefusalObject: + title: Refusal + type: object + description: The refusal content that is part of a message. + properties: + index: + type: integer + description: The index of the refusal part in the message. + type: + description: Always `refusal`. + type: string + enum: + - refusal + x-stainless-const: true + refusal: + type: string + required: + - index + - type + MessageDeltaContentTextAnnotationsFileCitationObject: + title: File citation + type: object + description: >- + A citation within the message that points to a specific quote from a specific File associated with the + assistant or the message. Generated when the assistant uses the "file_search" tool to search files. + properties: + index: + type: integer + description: The index of the annotation in the text content part. + type: + description: Always `file_citation`. + type: string + enum: + - file_citation + x-stainless-const: true + text: + description: The text in the message content that needs to be replaced. + type: string + file_citation: + type: object + properties: + file_id: + description: The ID of the specific File the citation is from. + type: string + quote: + description: The specific quote in the file. + type: string + start_index: + type: integer + minimum: 0 + end_index: + type: integer + minimum: 0 + required: + - index + - type + MessageDeltaContentTextAnnotationsFilePathObject: + title: File path + type: object + description: >- + A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a + file. + properties: + index: + type: integer + description: The index of the annotation in the text content part. + type: + description: Always `file_path`. + type: string + enum: + - file_path + x-stainless-const: true + text: + description: The text in the message content that needs to be replaced. + type: string + file_path: + type: object + properties: + file_id: + description: The ID of the file that was generated. + type: string + start_index: + type: integer + minimum: 0 + end_index: + type: integer + minimum: 0 + required: + - index + - type + MessageDeltaContentTextObject: + title: Text + type: object + description: The text content that is part of a message. + properties: + index: + type: integer + description: The index of the content part in the message. + type: + description: Always `text`. + type: string + enum: + - text + x-stainless-const: true + text: + type: object + properties: + value: + description: The data that makes up the text. + type: string + annotations: + type: array + items: + $ref: '#/components/schemas/TextAnnotationDelta' + required: + - index + - type + MessageDeltaObject: + type: object + title: Message delta object + description: | + Represents a message delta i.e. any changed fields on a message during streaming. + properties: + id: + description: The identifier of the message, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread.message.delta`. + type: string + enum: + - thread.message.delta + x-stainless-const: true + delta: + description: The delta containing the fields that have changed on the Message. + type: object + properties: + role: + description: The entity that produced the message. One of `user` or `assistant`. + type: string + enum: + - user + - assistant + content: + description: The content of the message in array of text and/or images. + type: array + items: + $ref: '#/components/schemas/MessageContentDelta' + required: + - id + - object + - delta + x-oaiMeta: + name: The message delta object + beta: true + example: | + { + "id": "msg_123", + "object": "thread.message.delta", + "delta": { + "content": [ + { + "index": 0, + "type": "text", + "text": { "value": "Hello", "annotations": [] } + } + ] + } + } + MessageObject: + type: object + title: The message object + description: Represents a message within a [thread](https://platform.openai.com/docs/api-reference/threads). + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread.message`. + type: string + enum: + - thread.message + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the message was created. + type: integer + thread_id: + description: >- + The [thread](https://platform.openai.com/docs/api-reference/threads) ID that this message belongs + to. + type: string + status: + description: The status of the message, which can be either `in_progress`, `incomplete`, or `completed`. + type: string + enum: + - in_progress + - incomplete + - completed + incomplete_details: + anyOf: + - description: On an incomplete message, details about why the message is incomplete. + type: object + properties: + reason: + type: string + description: The reason the message is incomplete. + enum: + - content_filter + - max_tokens + - run_cancelled + - run_expired + - run_failed + required: + - reason + - type: 'null' + completed_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the message was completed. + type: integer + - type: 'null' + incomplete_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the message was marked as incomplete. + type: integer + - type: 'null' + role: + description: The entity that produced the message. One of `user` or `assistant`. + type: string + enum: + - user + - assistant + content: + description: The content of the message in array of text and/or images. + type: array + items: + $ref: '#/components/schemas/MessageContent' + assistant_id: + anyOf: + - description: >- + If applicable, the ID of the + [assistant](https://platform.openai.com/docs/api-reference/assistants) that authored this + message. + type: string + - type: 'null' + run_id: + anyOf: + - description: >- + The ID of the [run](https://platform.openai.com/docs/api-reference/runs) associated with the + creation of this message. Value is `null` when messages are created manually using the create + message or create thread endpoints. + type: string + - type: 'null' + attachments: + anyOf: + - type: array + items: + type: object + properties: + file_id: + type: string + description: The ID of the file to attach to the message. + tools: + description: The tools to add this file to. + type: array + items: + anyOf: + - $ref: '#/components/schemas/AssistantToolsCode' + - $ref: '#/components/schemas/AssistantToolsFileSearchTypeOnly' + description: A list of files attached to the message, and the tools they were added to. + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + required: + - id + - object + - created_at + - thread_id + - status + - incomplete_details + - completed_at + - incomplete_at + - role + - content + - assistant_id + - run_id + - attachments + - metadata + x-oaiMeta: + name: The message object + beta: true + example: | + { + "id": "msg_abc123", + "object": "thread.message", + "created_at": 1698983503, + "thread_id": "thread_abc123", + "role": "assistant", + "content": [ + { + "type": "text", + "text": { + "value": "Hi! How can I help you today?", + "annotations": [] + } + } + ], + "assistant_id": "asst_abc123", + "run_id": "run_abc123", + "attachments": [], + "metadata": {} + } + MessageRequestContentTextObject: + title: Text + type: object + description: The text content that is part of a message. + properties: + type: + description: Always `text`. + type: string + enum: + - text + x-stainless-const: true + text: + type: string + description: Text content to be sent to the model + required: + - type + - text + MessageStreamEvent: + anyOf: + - type: object + properties: + event: + type: string + enum: + - thread.message.created + x-stainless-const: true + data: + $ref: '#/components/schemas/MessageObject' + required: + - event + - data + description: >- + Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is + created. + x-oaiMeta: + dataDescription: '`data` is a [message](/docs/api-reference/messages/object)' + - type: object + properties: + event: + type: string + enum: + - thread.message.in_progress + x-stainless-const: true + data: + $ref: '#/components/schemas/MessageObject' + required: + - event + - data + description: >- + Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) moves to + an `in_progress` state. + x-oaiMeta: + dataDescription: '`data` is a [message](/docs/api-reference/messages/object)' + - type: object + properties: + event: + type: string + enum: + - thread.message.delta + x-stainless-const: true + data: + $ref: '#/components/schemas/MessageDeltaObject' + required: + - event + - data + description: >- + Occurs when parts of a [Message](https://platform.openai.com/docs/api-reference/messages/object) + are being streamed. + x-oaiMeta: + dataDescription: '`data` is a [message delta](/docs/api-reference/assistants-streaming/message-delta-object)' + - type: object + properties: + event: + type: string + enum: + - thread.message.completed + x-stainless-const: true + data: + $ref: '#/components/schemas/MessageObject' + required: + - event + - data + description: >- + Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is + completed. + x-oaiMeta: + dataDescription: '`data` is a [message](/docs/api-reference/messages/object)' + - type: object + properties: + event: + type: string + enum: + - thread.message.incomplete + x-stainless-const: true + data: + $ref: '#/components/schemas/MessageObject' + required: + - event + - data + description: >- + Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) ends + before it is completed. + x-oaiMeta: + dataDescription: '`data` is a [message](/docs/api-reference/messages/object)' + discriminator: + propertyName: event + Metadata: + anyOf: + - type: object + description: | + Set of 16 key-value pairs that can be attached to an object. This can be + useful for storing additional information about the object in a structured + format, and querying for objects via API or the dashboard. + + Keys are strings with a maximum length of 64 characters. Values are strings + with a maximum length of 512 characters. + additionalProperties: + type: string + x-oaiTypeLabel: map + - type: 'null' + Model: + title: Model + description: Describes an OpenAI model offering that can be used with the API. + properties: + id: + type: string + description: The model identifier, which can be referenced in the API endpoints. + created: + type: integer + description: The Unix timestamp (in seconds) when the model was created. + object: + type: string + description: The object type, which is always "model". + enum: + - model + x-stainless-const: true + owned_by: + type: string + description: The organization that owns the model. + required: + - id + - object + - created + - owned_by + x-oaiMeta: + name: The model object + example: | + { + "id": "VAR_chat_model_id", + "object": "model", + "created": 1686935002, + "owned_by": "openai" + } + ModelIds: + anyOf: + - $ref: '#/components/schemas/ModelIdsShared' + - $ref: '#/components/schemas/ModelIdsResponses' + ModelIdsResponses: + example: gpt-4o + anyOf: + - $ref: '#/components/schemas/ModelIdsShared' + - type: string + title: ResponsesOnlyModel + enum: + - o1-pro + - o1-pro-2025-03-19 + - o3-pro + - o3-pro-2025-06-10 + - o3-deep-research + - o3-deep-research-2025-06-26 + - o4-mini-deep-research + - o4-mini-deep-research-2025-06-26 + - computer-use-preview + - computer-use-preview-2025-03-11 + - gpt-5-codex + - gpt-5-pro + - gpt-5-pro-2025-10-06 + ModelIdsShared: + example: gpt-4o + anyOf: + - type: string + - $ref: '#/components/schemas/ChatModel' + ModelResponseProperties: + type: object + properties: + metadata: + $ref: '#/components/schemas/Metadata' + top_logprobs: + anyOf: + - description: | + An integer between 0 and 20 specifying the number of most likely tokens to + return at each token position, each with an associated log probability. + type: integer + minimum: 0 + maximum: 20 + - type: 'null' + temperature: + anyOf: + - type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + - type: 'null' + top_p: + anyOf: + - type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + description: | + An alternative to sampling with temperature, called nucleus sampling, + where the model considers the results of the tokens with top_p probability + mass. So 0.1 means only the tokens comprising the top 10% probability mass + are considered. + + We generally recommend altering this or `temperature` but not both. + - type: 'null' + user: + type: string + example: user-1234 + deprecated: true + description: > + This field is being replaced by `safety_identifier` and `prompt_cache_key`. Use `prompt_cache_key` + instead to maintain caching optimizations. + + A stable identifier for your end-users. + + Used to boost cache hit rates by better bucketing similar requests and to help OpenAI detect and + prevent abuse. [Learn + more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + safety_identifier: + type: string + example: safety-identifier-1234 + description: > + A stable identifier used to help detect users of your application that may be violating OpenAI's + usage policies. + + The IDs should be a string that uniquely identifies each user. We recommend hashing their username + or email address, in order to avoid sending us any identifying information. [Learn + more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers). + prompt_cache_key: + type: string + example: prompt-cache-key-1234 + description: > + Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces + the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching). + service_tier: + $ref: '#/components/schemas/ServiceTier' + prompt_cache_retention: + anyOf: + - type: string + enum: + - in-memory + - 24h + description: > + The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, + which keeps cached prefixes active for longer, up to a maximum of 24 hours. [Learn + more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention). + - type: 'null' + ModifyAssistantRequest: + type: object + additionalProperties: false + properties: + model: + description: > + ID of the model to use. You can use the [List + models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your + available models, or see our [Model overview](https://platform.openai.com/docs/models) for + descriptions of them. + anyOf: + - type: string + - $ref: '#/components/schemas/AssistantSupportedModels' + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + name: + anyOf: + - description: | + The name of the assistant. The maximum length is 256 characters. + type: string + maxLength: 256 + - type: 'null' + description: + anyOf: + - description: | + The description of the assistant. The maximum length is 512 characters. + type: string + maxLength: 512 + - type: 'null' + instructions: + anyOf: + - description: | + The system instructions that the assistant uses. The maximum length is 256,000 characters. + type: string + maxLength: 256000 + - type: 'null' + tools: + description: > + A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools + can be of types `code_interpreter`, `file_search`, or `function`. + default: [] + type: array + maxItems: 128 + items: + $ref: '#/components/schemas/AssistantTool' + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are used by the assistant's tools. The resources are specific to the + type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the + `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + Overrides the list of [file](https://platform.openai.com/docs/api-reference/files) IDs + made available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + Overrides the [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this assistant. There can be a maximum of 1 vector store attached to the assistant. + maxItems: 1 + items: + type: string + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + anyOf: + - description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + - type: 'null' + top_p: + anyOf: + - type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + - type: 'null' + response_format: + anyOf: + - $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + - type: 'null' + ModifyCertificateRequest: + type: object + properties: + name: + type: string + description: The updated name for the certificate + required: + - name + ModifyMessageRequest: + type: object + additionalProperties: false + properties: + metadata: + $ref: '#/components/schemas/Metadata' + ModifyRunRequest: + type: object + additionalProperties: false + properties: + metadata: + $ref: '#/components/schemas/Metadata' + ModifyThreadRequest: + type: object + additionalProperties: false + properties: + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are made available to the assistant's tools in this thread. The + resources are specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this thread. There can be a maximum of 1 vector store attached to the thread. + maxItems: 1 + items: + type: string + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + Move: + type: object + title: Move + description: | + A mouse move action. + properties: + type: + type: string + enum: + - move + default: move + description: | + Specifies the event type. For a move action, this property is + always set to `move`. + x-stainless-const: true + x: + type: integer + description: | + The x-coordinate to move to. + 'y': + type: integer + description: | + The y-coordinate to move to. + required: + - type + - x + - 'y' + NoiseReductionType: + type: string + enum: + - near_field + - far_field + description: > + Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` + is for far-field microphones such as laptop or conference room microphones. + OpenAIFile: + title: OpenAIFile + description: The `File` object represents a document that has been uploaded to OpenAI. + properties: + id: + type: string + description: The file identifier, which can be referenced in the API endpoints. + bytes: + type: integer + description: The size of the file, in bytes. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the file was created. + expires_at: + type: integer + description: The Unix timestamp (in seconds) for when the file will expire. + filename: + type: string + description: The name of the file. + object: + type: string + description: The object type, which is always `file`. + enum: + - file + x-stainless-const: true + purpose: + type: string + description: >- + The intended purpose of the file. Supported values are `assistants`, `assistants_output`, `batch`, + `batch_output`, `fine-tune`, `fine-tune-results`, `vision`, and `user_data`. + enum: + - assistants + - assistants_output + - batch + - batch_output + - fine-tune + - fine-tune-results + - vision + - user_data + status: + type: string + deprecated: true + description: >- + Deprecated. The current status of the file, which can be either `uploaded`, `processed`, or + `error`. + enum: + - uploaded + - processed + - error + status_details: + type: string + deprecated: true + description: >- + Deprecated. For details on why a fine-tuning training file failed validation, see the `error` + field on `fine_tuning.job`. + required: + - id + - object + - bytes + - created_at + - filename + - purpose + - status + x-oaiMeta: + name: The file object + example: | + { + "id": "file-abc123", + "object": "file", + "bytes": 120000, + "created_at": 1677610602, + "expires_at": 1680202602, + "filename": "salesOverview.pdf", + "purpose": "assistants", + } + OtherChunkingStrategyResponseParam: + type: object + title: Other Chunking Strategy + description: >- + This is returned when the chunking strategy is unknown. Typically, this is because the file was + indexed before the `chunking_strategy` concept was introduced in the API. + additionalProperties: false + properties: + type: + type: string + description: Always `other`. + enum: + - other + x-stainless-const: true + required: + - type + OutputAudio: + type: object + title: Output audio + description: | + An audio output from the model. + properties: + type: + type: string + description: | + The type of the output audio. Always `output_audio`. + enum: + - output_audio + x-stainless-const: true + data: + type: string + description: | + Base64-encoded audio data from the model. + transcript: + type: string + description: | + The transcript of the audio data from the model. + required: + - type + - data + - transcript + OutputContent: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/OutputTextContent' + - $ref: '#/components/schemas/RefusalContent' + - $ref: '#/components/schemas/ReasoningTextContent' + OutputItem: + anyOf: + - $ref: '#/components/schemas/OutputMessage' + - $ref: '#/components/schemas/FileSearchToolCall' + - $ref: '#/components/schemas/FunctionToolCall' + - $ref: '#/components/schemas/WebSearchToolCall' + - $ref: '#/components/schemas/ComputerToolCall' + - $ref: '#/components/schemas/ReasoningItem' + - $ref: '#/components/schemas/ImageGenToolCall' + - $ref: '#/components/schemas/CodeInterpreterToolCall' + - $ref: '#/components/schemas/LocalShellToolCall' + - $ref: '#/components/schemas/FunctionShellCall' + - $ref: '#/components/schemas/FunctionShellCallOutput' + - $ref: '#/components/schemas/ApplyPatchToolCall' + - $ref: '#/components/schemas/ApplyPatchToolCallOutput' + - $ref: '#/components/schemas/MCPToolCall' + - $ref: '#/components/schemas/MCPListTools' + - $ref: '#/components/schemas/MCPApprovalRequest' + - $ref: '#/components/schemas/CustomToolCall' + discriminator: + propertyName: type + OutputMessage: + type: object + title: Output message + description: | + An output message from the model. + properties: + id: + type: string + description: | + The unique ID of the output message. + x-stainless-go-json: omitzero + type: + type: string + description: | + The type of the output message. Always `message`. + enum: + - message + x-stainless-const: true + role: + type: string + description: | + The role of the output message. Always `assistant`. + enum: + - assistant + x-stainless-const: true + content: + type: array + description: | + The content of the output message. + items: + $ref: '#/components/schemas/OutputMessageContent' + status: + type: string + description: | + The status of the message input. One of `in_progress`, `completed`, or + `incomplete`. Populated when input items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - id + - type + - role + - content + - status + OutputMessageContent: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/OutputTextContent' + - $ref: '#/components/schemas/RefusalContent' + ParallelToolCalls: + description: >- + Whether to enable [parallel function + calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling) + during tool use. + type: boolean + default: true + PartialImages: + anyOf: + - type: integer + maximum: 3 + minimum: 0 + default: 0 + example: 1 + description: | + The number of partial images to generate. This parameter is used for + streaming responses that return partial images. Value must be between 0 and 3. + When set to 0, the response will be a single image sent in one streaming event. + + Note that the final image may be sent before the full number of partial images + are generated if the full image is generated more quickly. + - type: 'null' + PredictionContent: + type: object + title: Static Content + description: | + Static predicted output content, such as the content of a text file that is + being regenerated. + required: + - type + - content + properties: + type: + type: string + enum: + - content + description: | + The type of the predicted content you want to provide. This type is + currently always `content`. + x-stainless-const: true + content: + description: | + The content that should be matched when generating a model response. + If generated tokens would match this content, the entire model response + can be returned much more quickly. + anyOf: + - type: string + title: Text content + description: | + The content used for a Predicted Output. This is often the + text of a file you are regenerating with minor changes. + - type: array + description: >- + An array of content parts with a defined type. Supported options differ based on the + [model](https://platform.openai.com/docs/models) being used to generate the response. Can + contain text inputs. + title: Array of content parts + items: + $ref: '#/components/schemas/ChatCompletionRequestMessageContentPartText' + minItems: 1 + Project: + type: object + description: Represents an individual project. + properties: + id: + type: string + description: The identifier, which can be referenced in API endpoints + object: + type: string + enum: + - organization.project + description: The object type, which is always `organization.project` + x-stainless-const: true + name: + type: string + description: The name of the project. This appears in reporting. + created_at: + type: integer + description: The Unix timestamp (in seconds) of when the project was created. + archived_at: + anyOf: + - type: integer + description: The Unix timestamp (in seconds) of when the project was archived or `null`. + - type: 'null' + status: + type: string + enum: + - active + - archived + description: '`active` or `archived`' + required: + - id + - object + - name + - created_at + - status + x-oaiMeta: + name: The project object + example: | + { + "id": "proj_abc", + "object": "organization.project", + "name": "Project example", + "created_at": 1711471533, + "archived_at": null, + "status": "active" + } + ProjectApiKey: + type: object + description: Represents an individual API key in a project. + properties: + object: + type: string + enum: + - organization.project.api_key + description: The object type, which is always `organization.project.api_key` + x-stainless-const: true + redacted_value: + type: string + description: The redacted value of the API key + name: + type: string + description: The name of the API key + created_at: + type: integer + description: The Unix timestamp (in seconds) of when the API key was created + last_used_at: + type: integer + description: The Unix timestamp (in seconds) of when the API key was last used. + id: + type: string + description: The identifier, which can be referenced in API endpoints + owner: + type: object + properties: + type: + type: string + enum: + - user + - service_account + description: '`user` or `service_account`' + user: + $ref: '#/components/schemas/ProjectUser' + service_account: + $ref: '#/components/schemas/ProjectServiceAccount' + required: + - object + - redacted_value + - name + - created_at + - last_used_at + - id + - owner + x-oaiMeta: + name: The project API key object + example: | + { + "object": "organization.project.api_key", + "redacted_value": "sk-abc...def", + "name": "My API Key", + "created_at": 1711471533, + "last_used_at": 1711471534, + "id": "key_abc", + "owner": { + "type": "user", + "user": { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "created_at": 1711471533 + } + } + } + ProjectApiKeyDeleteResponse: + type: object + properties: + object: + type: string + enum: + - organization.project.api_key.deleted + x-stainless-const: true + id: + type: string + deleted: + type: boolean + required: + - object + - id + - deleted + ProjectApiKeyListResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/ProjectApiKey' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ProjectCreateRequest: + type: object + properties: + name: + type: string + description: The friendly name of the project, this name appears in reports. + geography: + type: string + enum: + - US + - EU + - JP + - IN + - KR + - CA + - AU + - SG + description: >- + Create the project with the specified data residency region. Your organization must have access to + Data residency functionality in order to use. See [data residency + controls](https://platform.openai.com/docs/guides/your-data#data-residency-controls) to review the + functionality and limitations of setting this field. + required: + - name + ProjectListResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/Project' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ProjectRateLimit: + type: object + description: Represents a project rate limit config. + properties: + object: + type: string + enum: + - project.rate_limit + description: The object type, which is always `project.rate_limit` + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints. + model: + type: string + description: The model this rate limit applies to. + max_requests_per_1_minute: + type: integer + description: The maximum requests per minute. + max_tokens_per_1_minute: + type: integer + description: The maximum tokens per minute. + max_images_per_1_minute: + type: integer + description: The maximum images per minute. Only present for relevant models. + max_audio_megabytes_per_1_minute: + type: integer + description: The maximum audio megabytes per minute. Only present for relevant models. + max_requests_per_1_day: + type: integer + description: The maximum requests per day. Only present for relevant models. + batch_1_day_max_input_tokens: + type: integer + description: The maximum batch input tokens per day. Only present for relevant models. + required: + - object + - id + - model + - max_requests_per_1_minute + - max_tokens_per_1_minute + x-oaiMeta: + name: The project rate limit object + example: | + { + "object": "project.rate_limit", + "id": "rl_ada", + "model": "ada", + "max_requests_per_1_minute": 600, + "max_tokens_per_1_minute": 150000, + "max_images_per_1_minute": 10 + } + ProjectRateLimitListResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/ProjectRateLimit' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ProjectRateLimitUpdateRequest: + type: object + properties: + max_requests_per_1_minute: + type: integer + description: The maximum requests per minute. + max_tokens_per_1_minute: + type: integer + description: The maximum tokens per minute. + max_images_per_1_minute: + type: integer + description: The maximum images per minute. Only relevant for certain models. + max_audio_megabytes_per_1_minute: + type: integer + description: The maximum audio megabytes per minute. Only relevant for certain models. + max_requests_per_1_day: + type: integer + description: The maximum requests per day. Only relevant for certain models. + batch_1_day_max_input_tokens: + type: integer + description: The maximum batch input tokens per day. Only relevant for certain models. + ProjectServiceAccount: + type: object + description: Represents an individual service account in a project. + properties: + object: + type: string + enum: + - organization.project.service_account + description: The object type, which is always `organization.project.service_account` + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints + name: + type: string + description: The name of the service account + role: + type: string + enum: + - owner + - member + description: '`owner` or `member`' + created_at: + type: integer + description: The Unix timestamp (in seconds) of when the service account was created + required: + - object + - id + - name + - role + - created_at + x-oaiMeta: + name: The project service account object + example: | + { + "object": "organization.project.service_account", + "id": "svc_acct_abc", + "name": "Service Account", + "role": "owner", + "created_at": 1711471533 + } + ProjectServiceAccountApiKey: + type: object + properties: + object: + type: string + enum: + - organization.project.service_account.api_key + description: The object type, which is always `organization.project.service_account.api_key` + x-stainless-const: true + value: + type: string + name: + type: string + created_at: + type: integer + id: + type: string + required: + - object + - value + - name + - created_at + - id + ProjectServiceAccountCreateRequest: + type: object + properties: + name: + type: string + description: The name of the service account being created. + required: + - name + ProjectServiceAccountCreateResponse: + type: object + properties: + object: + type: string + enum: + - organization.project.service_account + x-stainless-const: true + id: + type: string + name: + type: string + role: + type: string + enum: + - member + description: Service accounts can only have one role of type `member` + x-stainless-const: true + created_at: + type: integer + api_key: + $ref: '#/components/schemas/ProjectServiceAccountApiKey' + required: + - object + - id + - name + - role + - created_at + - api_key + ProjectServiceAccountDeleteResponse: + type: object + properties: + object: + type: string + enum: + - organization.project.service_account.deleted + x-stainless-const: true + id: + type: string + deleted: + type: boolean + required: + - object + - id + - deleted + ProjectServiceAccountListResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/ProjectServiceAccount' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ProjectUpdateRequest: + type: object + properties: + name: + type: string + description: The updated name of the project, this name appears in reports. + required: + - name + ProjectUser: + type: object + description: Represents an individual user in a project. + properties: + object: + type: string + enum: + - organization.project.user + description: The object type, which is always `organization.project.user` + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints + name: + type: string + description: The name of the user + email: + type: string + description: The email address of the user + role: + type: string + enum: + - owner + - member + description: '`owner` or `member`' + added_at: + type: integer + description: The Unix timestamp (in seconds) of when the project was added. + required: + - object + - id + - name + - email + - role + - added_at + x-oaiMeta: + name: The project user object + example: | + { + "object": "organization.project.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + ProjectUserCreateRequest: + type: object + properties: + user_id: + type: string + description: The ID of the user. + role: + type: string + enum: + - owner + - member + description: '`owner` or `member`' + required: + - user_id + - role + ProjectUserDeleteResponse: + type: object + properties: + object: + type: string + enum: + - organization.project.user.deleted + x-stainless-const: true + id: + type: string + deleted: + type: boolean + required: + - object + - id + - deleted + ProjectUserListResponse: + type: object + properties: + object: + type: string + data: + type: array + items: + $ref: '#/components/schemas/ProjectUser' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + ProjectUserUpdateRequest: + type: object + properties: + role: + type: string + enum: + - owner + - member + description: '`owner` or `member`' + required: + - role + Prompt: + anyOf: + - type: object + description: | + Reference to a prompt template and its variables. + [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts). + required: + - id + properties: + id: + type: string + description: The unique identifier of the prompt template to use. + version: + anyOf: + - type: string + description: Optional version of the prompt template. + - type: 'null' + variables: + $ref: '#/components/schemas/ResponsePromptVariables' + - type: 'null' + RealtimeAudioFormats: + anyOf: + - type: object + title: PCM audio format + description: The PCM audio format. Only a 24kHz sample rate is supported. + properties: + type: + type: string + description: The audio format. Always `audio/pcm`. + enum: + - audio/pcm + rate: + type: integer + description: The sample rate of the audio. Always `24000`. + enum: + - 24000 + - type: object + title: PCMU audio format + description: The G.711 μ-law format. + properties: + type: + type: string + description: The audio format. Always `audio/pcmu`. + enum: + - audio/pcmu + - type: object + title: PCMA audio format + description: The G.711 A-law format. + properties: + type: + type: string + description: The audio format. Always `audio/pcma`. + enum: + - audio/pcma + discriminator: + propertyName: type + RealtimeBetaClientEventConversationItemCreate: + type: object + description: | + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.create`. + x-stainless-const: true + const: conversation.item.create + previous_item_id: + type: string + description: | + The ID of the preceding item after which the new item will be inserted. + If not set, the new item will be appended to the end of the conversation. + If set to `root`, the new item will be added to the beginning of the conversation. + If set to an existing ID, it allows an item to be inserted mid-conversation. If the + ID cannot be found, an error will be returned and the item will not be added. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - type + - item + x-oaiMeta: + name: conversation.item.create + group: realtime + example: | + { + "type": "conversation.item.create", + "item": { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "hi" + } + ] + }, + "event_id": "b904fba0-0ec4-40af-8bbb-f908a9b26793", + } + RealtimeBetaClientEventConversationItemDelete: + type: object + description: | + Send this event when you want to remove any item from the conversation + history. The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.delete`. + x-stainless-const: true + const: conversation.item.delete + item_id: + type: string + description: The ID of the item to delete. + required: + - type + - item_id + x-oaiMeta: + name: conversation.item.delete + group: realtime + example: | + { + "event_id": "event_901", + "type": "conversation.item.delete", + "item_id": "msg_003" + } + RealtimeBetaClientEventConversationItemRetrieve: + type: object + description: > + Send this event when you want to retrieve the server's representation of a specific item in the + conversation history. This is useful, for example, to inspect user audio after noise cancellation and + VAD. + + The server will respond with a `conversation.item.retrieved` event, + + unless the item does not exist in the conversation history, in which case the + + server will respond with an error. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.retrieve`. + x-stainless-const: true + const: conversation.item.retrieve + item_id: + type: string + description: The ID of the item to retrieve. + required: + - type + - item_id + x-oaiMeta: + name: conversation.item.retrieve + group: realtime + example: | + { + "event_id": "event_901", + "type": "conversation.item.retrieve", + "item_id": "msg_003" + } + RealtimeBetaClientEventConversationItemTruncate: + type: object + description: | + Send this event to truncate a previous assistant message’s audio. The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.truncate`. + x-stainless-const: true + const: conversation.item.truncate + item_id: + type: string + description: | + The ID of the assistant message item to truncate. Only assistant message + items can be truncated. + content_index: + type: integer + description: The index of the content part to truncate. Set this to 0. + audio_end_ms: + type: integer + description: | + Inclusive duration up to which audio is truncated, in milliseconds. If + the audio_end_ms is greater than the actual audio duration, the server + will respond with an error. + required: + - type + - item_id + - content_index + - audio_end_ms + x-oaiMeta: + name: conversation.item.truncate + group: realtime + example: | + { + "event_id": "event_678", + "type": "conversation.item.truncate", + "item_id": "msg_002", + "content_index": 0, + "audio_end_ms": 1500 + } + RealtimeBetaClientEventInputAudioBufferAppend: + type: object + description: | + Send this event to append audio bytes to the input audio buffer. The audio + buffer is temporary storage you can write to and later commit. In Server VAD + mode, the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike made other client events, the server will + not send a confirmation response to this event. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.append`. + x-stainless-const: true + const: input_audio_buffer.append + audio: + type: string + description: | + Base64-encoded audio bytes. This must be in the format specified by the + `input_audio_format` field in the session configuration. + required: + - type + - audio + x-oaiMeta: + name: input_audio_buffer.append + group: realtime + example: | + { + "event_id": "event_456", + "type": "input_audio_buffer.append", + "audio": "Base64EncodedAudioData" + } + RealtimeBetaClientEventInputAudioBufferClear: + type: object + description: | + Send this event to clear the audio bytes in the buffer. The server will + respond with an `input_audio_buffer.cleared` event. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.clear`. + x-stainless-const: true + const: input_audio_buffer.clear + required: + - type + x-oaiMeta: + name: input_audio_buffer.clear + group: realtime + example: | + { + "event_id": "event_012", + "type": "input_audio_buffer.clear" + } + RealtimeBetaClientEventInputAudioBufferCommit: + type: object + description: | + Send this event to commit the user input audio buffer, which will create a + new user message item in the conversation. This event will produce an error + if the input audio buffer is empty. When in Server VAD mode, the client does + not need to send this event, the server will commit the audio buffer + automatically. + + Committing the input audio buffer will trigger input audio transcription + (if enabled in session configuration), but it will not create a response + from the model. The server will respond with an `input_audio_buffer.committed` + event. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.commit`. + x-stainless-const: true + const: input_audio_buffer.commit + required: + - type + x-oaiMeta: + name: input_audio_buffer.commit + group: realtime + example: | + { + "event_id": "event_789", + "type": "input_audio_buffer.commit" + } + RealtimeBetaClientEventOutputAudioBufferClear: + type: object + description: > + **WebRTC Only:** Emit to cut off the current audio response. This will trigger the server to + + stop generating audio and emit a `output_audio_buffer.cleared` event. This + + event should be preceded by a `response.cancel` client event to stop the + + generation of the current response. + + [Learn + more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + properties: + event_id: + type: string + description: The unique ID of the client event used for error handling. + type: + description: The event type, must be `output_audio_buffer.clear`. + x-stainless-const: true + const: output_audio_buffer.clear + required: + - type + x-oaiMeta: + name: output_audio_buffer.clear + group: realtime + example: | + { + "event_id": "optional_client_event_id", + "type": "output_audio_buffer.clear" + } + RealtimeBetaClientEventResponseCancel: + type: object + description: | + Send this event to cancel an in-progress response. The server will respond + with a `response.done` event with a status of `response.status=cancelled`. If + there is no response to cancel, the server will respond with an error. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `response.cancel`. + x-stainless-const: true + const: response.cancel + response_id: + type: string + description: | + A specific response ID to cancel - if not provided, will cancel an + in-progress response in the default conversation. + required: + - type + x-oaiMeta: + name: response.cancel + group: realtime + example: | + { + "event_id": "event_567", + "type": "response.cancel" + } + RealtimeBetaClientEventResponseCreate: + type: object + description: | + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event can optionally include inference configuration like + `instructions`, and `temperature`. These fields will override the Session's + configuration for this Response only. + + Responses can be created out-of-band of the default Conversation, meaning that they can + have arbitrary input, and it's possible to disable writing the output to the Conversation. + Only one Response can write to the default Conversation at a time, but otherwise multiple + Responses can be created in parallel. + + Clients can set `conversation` to `none` to create a Response that does not write to the default + Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting + raw Items and references to existing Items. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `response.create`. + x-stainless-const: true + const: response.create + response: + $ref: '#/components/schemas/RealtimeBetaResponseCreateParams' + required: + - type + x-oaiMeta: + name: response.create + group: realtime + example: | + // Trigger a response with the default Conversation and no special parameters + { + "type": "response.create", + } + + // Trigger an out-of-band response that does not write to the default Conversation + { + "type": "response.create", + "response": { + "instructions": "Provide a concise answer.", + "tools": [], // clear any session tools + "conversation": "none", + "output_modalities": ["text"], + "input": [ + { + "type": "item_reference", + "id": "item_12345", + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Summarize the above message in one sentence." + } + ] + } + ], + } + } + RealtimeBetaClientEventSessionUpdate: + type: object + description: | + Send this event to update the session’s default configuration. + The client may send this event at any time to update any field, + except for `voice`. However, note that once a session has been + initialized with a particular `model`, it can’t be changed to + another model using `session.update`. + + When the server receives a `session.update`, it will respond + with a `session.updated` event showing the full, effective configuration. + Only the fields that are present are updated. To clear a field like + `instructions`, pass an empty string. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `session.update`. + x-stainless-const: true + const: session.update + session: + $ref: '#/components/schemas/RealtimeSessionCreateRequest' + required: + - type + - session + x-oaiMeta: + name: session.update + group: realtime + example: | + { + "type": "session.update", + "session": { + "tools": [ + { + "type": "function", + "name": "display_color_palette", + "description": "\nCall this function when a user asks for a color palette.\n", + "parameters": { + "type": "object", + "strict": true, + "properties": { + "theme": { + "type": "string", + "description": "Description of the theme for the color scheme." + }, + "colors": { + "type": "array", + "description": "Array of five hex color codes based on the theme.", + "items": { + "type": "string", + "description": "Hex color code" + } + } + }, + "required": [ + "theme", + "colors" + ] + } + } + ], + "tool_choice": "auto" + }, + "event_id": "5fc543c4-f59c-420f-8fb9-68c45d1546a7", + "timestamp": "2:30:32 PM" + } + RealtimeBetaClientEventTranscriptionSessionUpdate: + type: object + description: | + Send this event to update a transcription session. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `transcription_session.update`. + x-stainless-const: true + const: transcription_session.update + session: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequest' + required: + - type + - session + x-oaiMeta: + name: transcription_session.update + group: realtime + example: | + { + "type": "transcription_session.update", + "session": { + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "prompt": "", + "language": "" + }, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 500, + "create_response": true, + }, + "input_audio_noise_reduction": { + "type": "near_field" + }, + "include": [ + "item.input_audio_transcription.logprobs", + ] + } + } + RealtimeBetaResponse: + type: object + description: The response resource. + properties: + id: + type: string + description: The unique ID of the response. + object: + description: The object type, must be `realtime.response`. + x-stainless-const: true + const: realtime.response + status: + type: string + enum: + - completed + - cancelled + - failed + - incomplete + - in_progress + description: | + The final status of the response (`completed`, `cancelled`, `failed`, or + `incomplete`, `in_progress`). + status_details: + type: object + description: Additional details about the status. + properties: + type: + type: string + enum: + - completed + - cancelled + - incomplete + - failed + description: | + The type of error that caused the response to fail, corresponding + with the `status` field (`completed`, `cancelled`, `incomplete`, + `failed`). + reason: + type: string + enum: + - turn_detected + - client_cancelled + - max_output_tokens + - content_filter + description: | + The reason the Response did not complete. For a `cancelled` Response, + one of `turn_detected` (the server VAD detected a new start of speech) + or `client_cancelled` (the client sent a cancel event). For an + `incomplete` Response, one of `max_output_tokens` or `content_filter` + (the server-side safety filter activated and cut off the response). + error: + type: object + description: | + A description of the error that caused the response to fail, + populated when the `status` is `failed`. + properties: + type: + type: string + description: The type of error. + code: + type: string + description: Error code, if any. + output: + type: array + description: The list of output items generated by the response. + items: + $ref: '#/components/schemas/RealtimeConversationItem' + metadata: + $ref: '#/components/schemas/Metadata' + usage: + type: object + description: | + Usage statistics for the Response, this will correspond to billing. A + Realtime API session will maintain a conversation context and append new + Items to the Conversation, thus output from previous turns (text and + audio tokens) will become the input for later turns. + properties: + total_tokens: + type: integer + description: | + The total number of tokens in the Response including input and output + text and audio tokens. + input_tokens: + type: integer + description: | + The number of input tokens used in the Response, including text and + audio tokens. + output_tokens: + type: integer + description: | + The number of output tokens sent in the Response, including text and + audio tokens. + input_token_details: + type: object + description: Details about the input tokens used in the Response. + properties: + cached_tokens: + type: integer + description: The number of cached tokens used as input for the Response. + text_tokens: + type: integer + description: The number of text tokens used as input for the Response. + image_tokens: + type: integer + description: The number of image tokens used as input for the Response. + audio_tokens: + type: integer + description: The number of audio tokens used as input for the Response. + cached_tokens_details: + type: object + description: Details about the cached tokens used as input for the Response. + properties: + text_tokens: + type: integer + description: The number of cached text tokens used as input for the Response. + image_tokens: + type: integer + description: The number of cached image tokens used as input for the Response. + audio_tokens: + type: integer + description: The number of cached audio tokens used as input for the Response. + output_token_details: + type: object + description: Details about the output tokens used in the Response. + properties: + text_tokens: + type: integer + description: The number of text tokens used in the Response. + audio_tokens: + type: integer + description: The number of audio tokens used in the Response. + conversation_id: + description: | + Which conversation the response is added to, determined by the `conversation` + field in the `response.create` event. If `auto`, the response will be added to + the default conversation and the value of `conversation_id` will be an id like + `conv_1234`. If `none`, the response will not be added to any conversation and + the value of `conversation_id` will be `null`. If responses are being triggered + by server VAD, the response will be added to the default conversation, thus + the `conversation_id` will be an id like `conv_1234`. + type: string + voice: + $ref: '#/components/schemas/VoiceIdsShared' + description: | + The voice the model used to respond. + Current voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, and `verse`. + modalities: + type: array + description: | + The set of modalities the model used to respond. If there are multiple modalities, + the model will pick one, for example if `modalities` is `["text", "audio"]`, the model + could be responding in either text or audio. + items: + type: string + enum: + - text + - audio + output_audio_format: + type: string + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: | + The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + temperature: + type: number + description: | + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls, that was used in this response. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + RealtimeBetaResponseCreateParams: + type: object + description: Create a new Realtime response with these parameters + properties: + modalities: + type: array + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + instructions: + type: string + description: | + The default system instructions (i.e. system message) prepended to model + calls. This field allows the client to guide the model on desired + responses. The model can be instructed on response content and format, + (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed + to be followed by the model, but they provide guidance to the model on the + desired behavior. + + Note that the server sets default instructions which will be used if this + field is not set and are visible in the `session.created` event at the + start of the session. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, and `verse`. + output_audio_format: + type: string + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: | + The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + tools: + type: array + description: Tools (functions) available to the model. + items: + type: object + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + x-stainless-const: true + name: + type: string + description: The name of the function. + description: + type: string + description: | + The description of the function, including guidance on when and how + to call it, and guidance about what to tell the user when calling + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + tool_choice: + description: | + How the model chooses tools. Provide one of the string modes or force a specific + function/MCP tool. + default: auto + anyOf: + - $ref: '#/components/schemas/ToolChoiceOptions' + - $ref: '#/components/schemas/ToolChoiceFunction' + - $ref: '#/components/schemas/ToolChoiceMCP' + temperature: + type: number + description: | + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + conversation: + description: | + Controls which conversation the response is added to. Currently supports + `auto` and `none`, with `auto` as the default value. The `auto` value + means that the contents of the response will be added to the default + conversation. Set this to `none` to create an out-of-band response which + will not add items to default conversation. + anyOf: + - type: string + - type: string + default: auto + enum: + - auto + - none + metadata: + $ref: '#/components/schemas/Metadata' + prompt: + $ref: '#/components/schemas/Prompt' + input: + type: array + description: | + Input items to include in the prompt for the model. Using this field + creates a new context for this Response instead of using the default + conversation. An empty array `[]` will clear the context for this Response. + Note that this can include references to items from the default conversation. + items: + $ref: '#/components/schemas/RealtimeConversationItem' + RealtimeBetaServerEventConversationItemCreated: + type: object + description: | + Returned when a conversation item is created. There are several scenarios that produce this event: + - The server is generating a Response, which if successful will produce + either one or two Items, which will be of type `message` + (role `assistant`) or type `function_call`. + - The input audio buffer has been committed, either by the client or the + server (in `server_vad` mode). The server will take the content of the + input audio buffer and add it to a new user message Item. + - The client has sent a `conversation.item.create` event to add a new Item + to the Conversation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.created`. + x-stainless-const: true + const: conversation.item.created + previous_item_id: + anyOf: + - type: string + description: | + The ID of the preceding item in the Conversation context, allows the + client to understand the order of the conversation. Can be `null` if the + item has no predecessor. + - type: 'null' + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.created + group: realtime + example: | + { + "event_id": "event_1920", + "type": "conversation.item.created", + "previous_item_id": "msg_002", + "item": { + "id": "msg_003", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "user", + "content": [] + } + } + RealtimeBetaServerEventConversationItemDeleted: + type: object + description: | + Returned when an item in the conversation is deleted by the client with a + `conversation.item.delete` event. This event is used to synchronize the + server's understanding of the conversation history with the client's view. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.deleted`. + x-stainless-const: true + const: conversation.item.deleted + item_id: + type: string + description: The ID of the item that was deleted. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: conversation.item.deleted + group: realtime + example: | + { + "event_id": "event_2728", + "type": "conversation.item.deleted", + "item_id": "msg_005" + } + RealtimeBetaServerEventConversationItemInputAudioTranscriptionCompleted: + type: object + description: | + This event is the output of audio transcription for user audio written to the + user audio buffer. Transcription begins when the input audio buffer is + committed by the client or server (in `server_vad` mode). Transcription runs + asynchronously with Response creation, so this event may come before or after + the Response events. + + Realtime API models accept audio natively, and thus input transcription is a + separate process run on a separate ASR (Automatic Speech Recognition) model. + The transcript may diverge somewhat from the model's interpretation, and + should be treated as a rough guide. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.item.input_audio_transcription.completed + description: | + The event type, must be + `conversation.item.input_audio_transcription.completed`. + x-stainless-const: true + item_id: + type: string + description: The ID of the user message item containing the audio. + content_index: + type: integer + description: The index of the content part containing the audio. + transcript: + type: string + description: The transcribed text. + logprobs: + anyOf: + - type: array + description: The log probabilities of the transcription. + items: + $ref: '#/components/schemas/LogProbProperties' + - type: 'null' + usage: + type: object + description: Usage statistics for the transcription. + anyOf: + - $ref: '#/components/schemas/TranscriptTextUsageTokens' + title: Token Usage + - $ref: '#/components/schemas/TranscriptTextUsageDuration' + title: Duration Usage + required: + - event_id + - type + - item_id + - content_index + - transcript + - usage + x-oaiMeta: + name: conversation.item.input_audio_transcription.completed + group: realtime + example: | + { + "event_id": "event_2122", + "type": "conversation.item.input_audio_transcription.completed", + "item_id": "msg_003", + "content_index": 0, + "transcript": "Hello, how are you?", + "usage": { + "type": "tokens", + "total_tokens": 48, + "input_tokens": 38, + "input_token_details": { + "text_tokens": 10, + "audio_tokens": 28, + }, + "output_tokens": 10, + } + } + RealtimeBetaServerEventConversationItemInputAudioTranscriptionDelta: + type: object + description: | + Returned when the text value of an input audio transcription content part is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.input_audio_transcription.delta`. + x-stainless-const: true + const: conversation.item.input_audio_transcription.delta + item_id: + type: string + description: The ID of the item. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The text delta. + logprobs: + anyOf: + - type: array + description: The log probabilities of the transcription. + items: + $ref: '#/components/schemas/LogProbProperties' + - type: 'null' + required: + - event_id + - type + - item_id + x-oaiMeta: + name: conversation.item.input_audio_transcription.delta + group: realtime + example: | + { + "type": "conversation.item.input_audio_transcription.delta", + "event_id": "event_001", + "item_id": "item_001", + "content_index": 0, + "delta": "Hello" + } + RealtimeBetaServerEventConversationItemInputAudioTranscriptionFailed: + type: object + description: | + Returned when input audio transcription is configured, and a transcription + request for a user message failed. These events are separate from other + `error` events so that the client can identify the related Item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.item.input_audio_transcription.failed + description: | + The event type, must be + `conversation.item.input_audio_transcription.failed`. + x-stainless-const: true + item_id: + type: string + description: The ID of the user message item. + content_index: + type: integer + description: The index of the content part containing the audio. + error: + type: object + description: Details of the transcription error. + properties: + type: + type: string + description: The type of error. + code: + type: string + description: Error code, if any. + message: + type: string + description: A human-readable error message. + param: + type: string + description: Parameter related to the error, if any. + required: + - event_id + - type + - item_id + - content_index + - error + x-oaiMeta: + name: conversation.item.input_audio_transcription.failed + group: realtime + example: | + { + "event_id": "event_2324", + "type": "conversation.item.input_audio_transcription.failed", + "item_id": "msg_003", + "content_index": 0, + "error": { + "type": "transcription_error", + "code": "audio_unintelligible", + "message": "The audio could not be transcribed.", + "param": null + } + } + RealtimeBetaServerEventConversationItemInputAudioTranscriptionSegment: + type: object + description: Returned when an input audio transcription segment is identified for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.input_audio_transcription.segment`. + x-stainless-const: true + const: conversation.item.input_audio_transcription.segment + item_id: + type: string + description: The ID of the item containing the input audio content. + content_index: + type: integer + description: The index of the input audio content part within the item. + text: + type: string + description: The text for this segment. + id: + type: string + description: The segment identifier. + speaker: + type: string + description: The detected speaker label for this segment. + start: + type: number + format: float + description: Start time of the segment in seconds. + end: + type: number + format: float + description: End time of the segment in seconds. + required: + - event_id + - type + - item_id + - content_index + - text + - id + - speaker + - start + - end + x-oaiMeta: + name: conversation.item.input_audio_transcription.segment + group: realtime + example: | + { + "event_id": "event_6501", + "type": "conversation.item.input_audio_transcription.segment", + "item_id": "msg_011", + "content_index": 0, + "text": "hello", + "id": "seg_0001", + "speaker": "spk_1", + "start": 0.0, + "end": 0.4 + } + RealtimeBetaServerEventConversationItemRetrieved: + type: object + description: | + Returned when a conversation item is retrieved with `conversation.item.retrieve`. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.retrieved`. + x-stainless-const: true + const: conversation.item.retrieved + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.retrieved + group: realtime + example: | + { + "event_id": "event_1920", + "type": "conversation.item.created", + "previous_item_id": "msg_002", + "item": { + "id": "msg_003", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "user", + "content": [ + { + "type": "input_audio", + "transcript": "hello how are you", + "audio": "base64encodedaudio==" + } + ] + } + } + RealtimeBetaServerEventConversationItemTruncated: + type: object + description: | + Returned when an earlier assistant audio message item is truncated by the + client with a `conversation.item.truncate` event. This event is used to + synchronize the server's understanding of the audio with the client's playback. + + This action will truncate the audio and remove the server-side text transcript + to ensure there is no text in the context that hasn't been heard by the user. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.truncated`. + x-stainless-const: true + const: conversation.item.truncated + item_id: + type: string + description: The ID of the assistant message item that was truncated. + content_index: + type: integer + description: The index of the content part that was truncated. + audio_end_ms: + type: integer + description: | + The duration up to which the audio was truncated, in milliseconds. + required: + - event_id + - type + - item_id + - content_index + - audio_end_ms + x-oaiMeta: + name: conversation.item.truncated + group: realtime + example: | + { + "event_id": "event_2526", + "type": "conversation.item.truncated", + "item_id": "msg_004", + "content_index": 0, + "audio_end_ms": 1500 + } + RealtimeBetaServerEventError: + type: object + description: | + Returned when an error occurs, which could be a client problem or a server + problem. Most errors are recoverable and the session will stay open, we + recommend to implementors to monitor and log error messages by default. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `error`. + x-stainless-const: true + const: error + error: + type: object + description: Details of the error. + required: + - type + - message + properties: + type: + type: string + description: | + The type of error (e.g., "invalid_request_error", "server_error"). + code: + anyOf: + - type: string + description: Error code, if any. + - type: 'null' + message: + type: string + description: A human-readable error message. + param: + anyOf: + - type: string + description: Parameter related to the error, if any. + - type: 'null' + event_id: + anyOf: + - type: string + description: | + The event_id of the client event that caused the error, if applicable. + - type: 'null' + required: + - event_id + - type + - error + x-oaiMeta: + name: error + group: realtime + example: | + { + "event_id": "event_890", + "type": "error", + "error": { + "type": "invalid_request_error", + "code": "invalid_event", + "message": "The 'type' field is missing.", + "param": null, + "event_id": "event_567" + } + } + RealtimeBetaServerEventInputAudioBufferCleared: + type: object + description: | + Returned when the input audio buffer is cleared by the client with a + `input_audio_buffer.clear` event. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.cleared`. + x-stainless-const: true + const: input_audio_buffer.cleared + required: + - event_id + - type + x-oaiMeta: + name: input_audio_buffer.cleared + group: realtime + example: | + { + "event_id": "event_1314", + "type": "input_audio_buffer.cleared" + } + RealtimeBetaServerEventInputAudioBufferCommitted: + type: object + description: | + Returned when an input audio buffer is committed, either by the client or + automatically in server VAD mode. The `item_id` property is the ID of the user + message item that will be created, thus a `conversation.item.created` event + will also be sent to the client. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.committed`. + x-stainless-const: true + const: input_audio_buffer.committed + previous_item_id: + anyOf: + - type: string + description: | + The ID of the preceding item after which the new item will be inserted. + Can be `null` if the item has no predecessor. + - type: 'null' + item_id: + type: string + description: The ID of the user message item that will be created. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: input_audio_buffer.committed + group: realtime + example: | + { + "event_id": "event_1121", + "type": "input_audio_buffer.committed", + "previous_item_id": "msg_001", + "item_id": "msg_002" + } + RealtimeBetaServerEventInputAudioBufferSpeechStarted: + type: object + description: | + Sent by the server when in `server_vad` mode to indicate that speech has been + detected in the audio buffer. This can happen any time audio is added to the + buffer (unless speech is already detected). The client may want to use this + event to interrupt audio playback or provide visual feedback to the user. + + The client should expect to receive a `input_audio_buffer.speech_stopped` event + when speech stops. The `item_id` property is the ID of the user message item + that will be created when speech stops and will also be included in the + `input_audio_buffer.speech_stopped` event (unless the client manually commits + the audio buffer during VAD activation). + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.speech_started`. + x-stainless-const: true + const: input_audio_buffer.speech_started + audio_start_ms: + type: integer + description: | + Milliseconds from the start of all audio written to the buffer during the + session when speech was first detected. This will correspond to the + beginning of audio sent to the model, and thus includes the + `prefix_padding_ms` configured in the Session. + item_id: + type: string + description: | + The ID of the user message item that will be created when speech stops. + required: + - event_id + - type + - audio_start_ms + - item_id + x-oaiMeta: + name: input_audio_buffer.speech_started + group: realtime + example: | + { + "event_id": "event_1516", + "type": "input_audio_buffer.speech_started", + "audio_start_ms": 1000, + "item_id": "msg_003" + } + RealtimeBetaServerEventInputAudioBufferSpeechStopped: + type: object + description: | + Returned in `server_vad` mode when the server detects the end of speech in + the audio buffer. The server will also send an `conversation.item.created` + event with the user message item that is created from the audio buffer. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.speech_stopped`. + x-stainless-const: true + const: input_audio_buffer.speech_stopped + audio_end_ms: + type: integer + description: | + Milliseconds since the session started when speech stopped. This will + correspond to the end of audio sent to the model, and thus includes the + `min_silence_duration_ms` configured in the Session. + item_id: + type: string + description: The ID of the user message item that will be created. + required: + - event_id + - type + - audio_end_ms + - item_id + x-oaiMeta: + name: input_audio_buffer.speech_stopped + group: realtime + example: | + { + "event_id": "event_1718", + "type": "input_audio_buffer.speech_stopped", + "audio_end_ms": 2000, + "item_id": "msg_003" + } + RealtimeBetaServerEventMCPListToolsCompleted: + type: object + description: Returned when listing MCP tools has completed for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.completed`. + x-stainless-const: true + const: mcp_list_tools.completed + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.completed + group: realtime + example: | + { + "event_id": "event_6102", + "type": "mcp_list_tools.completed", + "item_id": "mcp_list_tools_001" + } + RealtimeBetaServerEventMCPListToolsFailed: + type: object + description: Returned when listing MCP tools has failed for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.failed`. + x-stainless-const: true + const: mcp_list_tools.failed + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.failed + group: realtime + example: | + { + "event_id": "event_6103", + "type": "mcp_list_tools.failed", + "item_id": "mcp_list_tools_001" + } + RealtimeBetaServerEventMCPListToolsInProgress: + type: object + description: Returned when listing MCP tools is in progress for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.in_progress`. + x-stainless-const: true + const: mcp_list_tools.in_progress + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.in_progress + group: realtime + example: | + { + "event_id": "event_6101", + "type": "mcp_list_tools.in_progress", + "item_id": "mcp_list_tools_001" + } + RealtimeBetaServerEventRateLimitsUpdated: + type: object + description: | + Emitted at the beginning of a Response to indicate the updated rate limits. + When a Response is created some tokens will be "reserved" for the output + tokens, the rate limits shown here reflect that reservation, which is then + adjusted accordingly once the Response is completed. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `rate_limits.updated`. + x-stainless-const: true + const: rate_limits.updated + rate_limits: + type: array + description: List of rate limit information. + items: + type: object + properties: + name: + type: string + enum: + - requests + - tokens + description: | + The name of the rate limit (`requests`, `tokens`). + limit: + type: integer + description: The maximum allowed value for the rate limit. + remaining: + type: integer + description: The remaining value before the limit is reached. + reset_seconds: + type: number + description: Seconds until the rate limit resets. + required: + - event_id + - type + - rate_limits + x-oaiMeta: + name: rate_limits.updated + group: realtime + example: | + { + "event_id": "event_5758", + "type": "rate_limits.updated", + "rate_limits": [ + { + "name": "requests", + "limit": 1000, + "remaining": 999, + "reset_seconds": 60 + }, + { + "name": "tokens", + "limit": 50000, + "remaining": 49950, + "reset_seconds": 60 + } + ] + } + RealtimeBetaServerEventResponseAudioDelta: + type: object + description: Returned when the model-generated audio is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio.delta`. + x-stainless-const: true + const: response.output_audio.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: Base64-encoded audio data delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_audio.delta + group: realtime + example: | + { + "event_id": "event_4950", + "type": "response.output_audio.delta", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "delta": "Base64EncodedAudioDelta" + } + RealtimeBetaServerEventResponseAudioDone: + type: object + description: | + Returned when the model-generated audio is done. Also emitted when a Response + is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio.done`. + x-stainless-const: true + const: response.output_audio.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + x-oaiMeta: + name: response.output_audio.done + group: realtime + example: | + { + "event_id": "event_5152", + "type": "response.output_audio.done", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0 + } + RealtimeBetaServerEventResponseAudioTranscriptDelta: + type: object + description: | + Returned when the model-generated transcription of audio output is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio_transcript.delta`. + x-stainless-const: true + const: response.output_audio_transcript.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The transcript delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_audio_transcript.delta + group: realtime + example: | + { + "event_id": "event_4546", + "type": "response.output_audio_transcript.delta", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "delta": "Hello, how can I a" + } + RealtimeBetaServerEventResponseAudioTranscriptDone: + type: object + description: | + Returned when the model-generated transcription of audio output is done + streaming. Also emitted when a Response is interrupted, incomplete, or + cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio_transcript.done`. + x-stainless-const: true + const: response.output_audio_transcript.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + transcript: + type: string + description: The final transcript of the audio. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - transcript + x-oaiMeta: + name: response.output_audio_transcript.done + group: realtime + example: | + { + "event_id": "event_4748", + "type": "response.output_audio_transcript.done", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "transcript": "Hello, how can I assist you today?" + } + RealtimeBetaServerEventResponseContentPartAdded: + type: object + description: | + Returned when a new content part is added to an assistant message item during + response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.content_part.added`. + x-stainless-const: true + const: response.content_part.added + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item to which the content part was added. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + part: + type: object + description: The content part that was added. + properties: + type: + type: string + enum: + - text + - audio + description: The content type ("text", "audio"). + text: + type: string + description: The text content (if type is "text"). + audio: + type: string + description: Base64-encoded audio data (if type is "audio"). + transcript: + type: string + description: The transcript of the audio (if type is "audio"). + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - part + x-oaiMeta: + name: response.content_part.added + group: realtime + example: | + { + "event_id": "event_3738", + "type": "response.content_part.added", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "part": { + "type": "text", + "text": "" + } + } + RealtimeBetaServerEventResponseContentPartDone: + type: object + description: | + Returned when a content part is done streaming in an assistant message item. + Also emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.content_part.done`. + x-stainless-const: true + const: response.content_part.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + part: + type: object + description: The content part that is done. + properties: + type: + type: string + enum: + - text + - audio + description: The content type ("text", "audio"). + text: + type: string + description: The text content (if type is "text"). + audio: + type: string + description: Base64-encoded audio data (if type is "audio"). + transcript: + type: string + description: The transcript of the audio (if type is "audio"). + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - part + x-oaiMeta: + name: response.content_part.done + group: realtime + example: | + { + "event_id": "event_3940", + "type": "response.content_part.done", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "part": { + "type": "text", + "text": "Sure, I can help with that." + } + } + RealtimeBetaServerEventResponseCreated: + type: object + description: | + Returned when a new Response is created. The first event of response creation, + where the response is in an initial state of `in_progress`. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.created`. + x-stainless-const: true + const: response.created + response: + $ref: '#/components/schemas/RealtimeBetaResponse' + required: + - event_id + - type + - response + x-oaiMeta: + name: response.created + group: realtime + example: | + { + "type": "response.created", + "event_id": "event_C9G8pqbTEddBSIxbBN6Os", + "response": { + "object": "realtime.response", + "id": "resp_C9G8p7IH2WxLbkgPNouYL", + "status": "in_progress", + "status_details": null, + "output": [], + "conversation_id": "conv_C9G8mmBkLhQJwCon3hoJN", + "output_modalities": [ + "audio" + ], + "max_output_tokens": "inf", + "audio": { + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "marin" + } + }, + "usage": null, + "metadata": null + }, + "timestamp": "2:30:35 PM" + } + RealtimeBetaServerEventResponseDone: + type: object + description: | + Returned when a Response is done streaming. Always emitted, no matter the + final state. The Response object included in the `response.done` event will + include all output Items in the Response but will omit the raw audio data. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.done`. + x-stainless-const: true + const: response.done + response: + $ref: '#/components/schemas/RealtimeBetaResponse' + required: + - event_id + - type + - response + x-oaiMeta: + name: response.done + group: realtime + example: | + { + "event_id": "event_3132", + "type": "response.done", + "response": { + "id": "resp_001", + "object": "realtime.response", + "status": "completed", + "status_details": null, + "output": [ + { + "id": "msg_006", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Sure, how can I assist you today?" + } + ] + } + ], + "usage": { + "total_tokens":275, + "input_tokens":127, + "output_tokens":148, + "input_token_details": { + "cached_tokens":384, + "text_tokens":119, + "audio_tokens":8, + "cached_tokens_details": { + "text_tokens": 128, + "audio_tokens": 256 + } + }, + "output_token_details": { + "text_tokens":36, + "audio_tokens":112 + } + } + } + } + RealtimeBetaServerEventResponseFunctionCallArgumentsDelta: + type: object + description: | + Returned when the model-generated function call arguments are updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: | + The event type, must be `response.function_call_arguments.delta`. + x-stainless-const: true + const: response.function_call_arguments.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the function call item. + output_index: + type: integer + description: The index of the output item in the response. + call_id: + type: string + description: The ID of the function call. + delta: + type: string + description: The arguments delta as a JSON string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - call_id + - delta + x-oaiMeta: + name: response.function_call_arguments.delta + group: realtime + example: | + { + "event_id": "event_5354", + "type": "response.function_call_arguments.delta", + "response_id": "resp_002", + "item_id": "fc_001", + "output_index": 0, + "call_id": "call_001", + "delta": "{\"location\": \"San\"" + } + RealtimeBetaServerEventResponseFunctionCallArgumentsDone: + type: object + description: | + Returned when the model-generated function call arguments are done streaming. + Also emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: | + The event type, must be `response.function_call_arguments.done`. + x-stainless-const: true + const: response.function_call_arguments.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the function call item. + output_index: + type: integer + description: The index of the output item in the response. + call_id: + type: string + description: The ID of the function call. + arguments: + type: string + description: The final arguments as a JSON string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - call_id + - arguments + x-oaiMeta: + name: response.function_call_arguments.done + group: realtime + example: | + { + "event_id": "event_5556", + "type": "response.function_call_arguments.done", + "response_id": "resp_002", + "item_id": "fc_001", + "output_index": 0, + "call_id": "call_001", + "arguments": "{\"location\": \"San Francisco\"}" + } + RealtimeBetaServerEventResponseMCPCallArgumentsDelta: + type: object + description: Returned when MCP tool call arguments are updated during response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call_arguments.delta`. + x-stainless-const: true + const: response.mcp_call_arguments.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the MCP tool call item. + output_index: + type: integer + description: The index of the output item in the response. + delta: + type: string + description: The JSON-encoded arguments delta. + obfuscation: + anyOf: + - type: string + description: If present, indicates the delta text was obfuscated. + - type: 'null' + required: + - event_id + - type + - response_id + - item_id + - output_index + - delta + x-oaiMeta: + name: response.mcp_call_arguments.delta + group: realtime + example: | + { + "event_id": "event_6201", + "type": "response.mcp_call_arguments.delta", + "response_id": "resp_001", + "item_id": "mcp_call_001", + "output_index": 0, + "delta": "{\"partial\":true}" + } + RealtimeBetaServerEventResponseMCPCallArgumentsDone: + type: object + description: Returned when MCP tool call arguments are finalized during response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call_arguments.done`. + x-stainless-const: true + const: response.mcp_call_arguments.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the MCP tool call item. + output_index: + type: integer + description: The index of the output item in the response. + arguments: + type: string + description: The final JSON-encoded arguments string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - arguments + x-oaiMeta: + name: response.mcp_call_arguments.done + group: realtime + example: | + { + "event_id": "event_6202", + "type": "response.mcp_call_arguments.done", + "response_id": "resp_001", + "item_id": "mcp_call_001", + "output_index": 0, + "arguments": "{\"q\":\"docs\"}" + } + RealtimeBetaServerEventResponseMCPCallCompleted: + type: object + description: Returned when an MCP tool call has completed successfully. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.completed`. + x-stainless-const: true + const: response.mcp_call.completed + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.completed + group: realtime + example: | + { + "event_id": "event_6302", + "type": "response.mcp_call.completed", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeBetaServerEventResponseMCPCallFailed: + type: object + description: Returned when an MCP tool call has failed. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.failed`. + x-stainless-const: true + const: response.mcp_call.failed + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.failed + group: realtime + example: | + { + "event_id": "event_6303", + "type": "response.mcp_call.failed", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeBetaServerEventResponseMCPCallInProgress: + type: object + description: Returned when an MCP tool call has started and is in progress. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.in_progress`. + x-stainless-const: true + const: response.mcp_call.in_progress + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.in_progress + group: realtime + example: | + { + "event_id": "event_6301", + "type": "response.mcp_call.in_progress", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeBetaServerEventResponseOutputItemAdded: + type: object + description: Returned when a new Item is created during Response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_item.added`. + x-stainless-const: true + const: response.output_item.added + response_id: + type: string + description: The ID of the Response to which the item belongs. + output_index: + type: integer + description: The index of the output item in the Response. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - response_id + - output_index + - item + x-oaiMeta: + name: response.output_item.added + group: realtime + example: | + { + "event_id": "event_3334", + "type": "response.output_item.added", + "response_id": "resp_001", + "output_index": 0, + "item": { + "id": "msg_007", + "object": "realtime.item", + "type": "message", + "status": "in_progress", + "role": "assistant", + "content": [] + } + } + RealtimeBetaServerEventResponseOutputItemDone: + type: object + description: | + Returned when an Item is done streaming. Also emitted when a Response is + interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_item.done`. + x-stainless-const: true + const: response.output_item.done + response_id: + type: string + description: The ID of the Response to which the item belongs. + output_index: + type: integer + description: The index of the output item in the Response. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - response_id + - output_index + - item + x-oaiMeta: + name: response.output_item.done + group: realtime + example: | + { + "event_id": "event_3536", + "type": "response.output_item.done", + "response_id": "resp_001", + "output_index": 0, + "item": { + "id": "msg_007", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Sure, I can help with that." + } + ] + } + } + RealtimeBetaServerEventResponseTextDelta: + type: object + description: Returned when the text value of an "output_text" content part is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_text.delta`. + x-stainless-const: true + const: response.output_text.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The text delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_text.delta + group: realtime + example: | + { + "event_id": "event_4142", + "type": "response.output_text.delta", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "delta": "Sure, I can h" + } + RealtimeBetaServerEventResponseTextDone: + type: object + description: | + Returned when the text value of an "output_text" content part is done streaming. Also + emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_text.done`. + x-stainless-const: true + const: response.output_text.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + text: + type: string + description: The final text content. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - text + x-oaiMeta: + name: response.output_text.done + group: realtime + example: | + { + "event_id": "event_4344", + "type": "response.output_text.done", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "text": "Sure, I can help with that." + } + RealtimeBetaServerEventSessionCreated: + type: object + description: | + Returned when a Session is created. Emitted automatically when a new + connection is established as the first server event. This event will contain + the default Session configuration. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `session.created`. + x-stainless-const: true + const: session.created + session: + $ref: '#/components/schemas/RealtimeSession' + required: + - event_id + - type + - session + x-oaiMeta: + name: session.created + group: realtime + example: | + { + "type": "session.created", + "event_id": "event_C9G5RJeJ2gF77mV7f2B1j", + "session": { + "object": "realtime.session", + "id": "sess_C9G5QPteg4UIbotdKLoYQ", + "model": "gpt-realtime-2025-08-28", + "modalities": [ + "audio" + ], + "instructions": "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, even if you’re asked about them.", + "tools": [], + "tool_choice": "auto", + "max_response_output_tokens": "inf", + "tracing": null, + "prompt": null, + "expires_at": 1756324625, + "input_audio_format": "pcm16", + "input_audio_transcription": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200, + "idle_timeout_ms": null, + "create_response": true, + "interrupt_response": true + }, + "output_audio_format": "pcm16", + "voice": "marin", + "include": null + } + } + RealtimeBetaServerEventSessionUpdated: + type: object + description: | + Returned when a session is updated with a `session.update` event, unless + there is an error. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `session.updated`. + x-stainless-const: true + const: session.updated + session: + $ref: '#/components/schemas/RealtimeSession' + required: + - event_id + - type + - session + x-oaiMeta: + name: session.updated + group: realtime + example: | + { + "event_id": "event_5678", + "type": "session.updated", + "session": { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-realtime", + "modalities": ["text"], + "instructions": "New instructions", + "voice": "sage", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "max_response_output_tokens": 200, + "speed": 1.1, + "tracing": "auto" + } + } + RealtimeBetaServerEventTranscriptionSessionCreated: + type: object + description: | + Returned when a transcription session is created. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `transcription_session.created`. + x-stainless-const: true + const: transcription_session.created + session: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateResponse' + required: + - event_id + - type + - session + x-oaiMeta: + name: transcription_session.created + group: realtime + example: | + { + "event_id": "event_5566", + "type": "transcription_session.created", + "session": { + "id": "sess_001", + "object": "realtime.transcription_session", + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "prompt": "", + "language": "" + }, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 500 + }, + "input_audio_noise_reduction": { + "type": "near_field" + }, + "include": [] + } + } + RealtimeBetaServerEventTranscriptionSessionUpdated: + type: object + description: | + Returned when a transcription session is updated with a `transcription_session.update` event, unless + there is an error. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `transcription_session.updated`. + x-stainless-const: true + const: transcription_session.updated + session: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateResponse' + required: + - event_id + - type + - session + x-oaiMeta: + name: transcription_session.updated + group: realtime + example: | + { + "event_id": "event_5678", + "type": "transcription_session.updated", + "session": { + "id": "sess_001", + "object": "realtime.transcription_session", + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "prompt": "", + "language": "" + }, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 500, + "create_response": true, + // "interrupt_response": false -- this will NOT be returned + }, + "input_audio_noise_reduction": { + "type": "near_field" + }, + "include": [ + "item.input_audio_transcription.avg_logprob", + ], + } + } + RealtimeCallCreateRequest: + title: Realtime call creation request + type: object + description: |- + Parameters required to initiate a realtime call and receive the SDP answer + needed to complete a WebRTC peer connection. Provide an SDP offer generated + by your client and optionally configure the session that will answer the call. + required: + - sdp + properties: + sdp: + type: string + description: WebRTC Session Description Protocol (SDP) offer generated by the caller. + session: + title: Session configuration + allOf: + - $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + description: >- + Optional session configuration to apply before the realtime session is + + created. Use the same parameters you would send in a [`create client + secret`](https://platform.openai.com/docs/api-reference/realtime-sessions/create-realtime-client-secret) + + request. + additionalProperties: false + RealtimeCallReferRequest: + title: Realtime call refer request + type: object + description: |- + Parameters required to transfer a SIP call to a new destination using the + Realtime API. + required: + - target_uri + properties: + target_uri: + type: string + description: |- + URI that should appear in the SIP Refer-To header. Supports values like + `tel:+14155550123` or `sip:agent@example.com`. + example: tel:+14155550123 + additionalProperties: false + RealtimeCallRejectRequest: + title: Realtime call reject request + type: object + description: Parameters used to decline an incoming SIP call handled by the Realtime API. + properties: + status_code: + type: integer + description: |- + SIP response code to send back to the caller. Defaults to `603` (Decline) + when omitted. + example: 486 + additionalProperties: false + RealtimeClientEvent: + discriminator: + propertyName: type + description: | + A realtime client event. + anyOf: + - $ref: '#/components/schemas/RealtimeClientEventConversationItemCreate' + - $ref: '#/components/schemas/RealtimeClientEventConversationItemDelete' + - $ref: '#/components/schemas/RealtimeClientEventConversationItemRetrieve' + - $ref: '#/components/schemas/RealtimeClientEventConversationItemTruncate' + - $ref: '#/components/schemas/RealtimeClientEventInputAudioBufferAppend' + - $ref: '#/components/schemas/RealtimeClientEventInputAudioBufferClear' + - $ref: '#/components/schemas/RealtimeClientEventOutputAudioBufferClear' + - $ref: '#/components/schemas/RealtimeClientEventInputAudioBufferCommit' + - $ref: '#/components/schemas/RealtimeClientEventResponseCancel' + - $ref: '#/components/schemas/RealtimeClientEventResponseCreate' + - $ref: '#/components/schemas/RealtimeClientEventSessionUpdate' + RealtimeClientEventConversationItemCreate: + type: object + description: | + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.create`. + x-stainless-const: true + const: conversation.item.create + previous_item_id: + type: string + description: | + The ID of the preceding item after which the new item will be inserted. + If not set, the new item will be appended to the end of the conversation. + If set to `root`, the new item will be added to the beginning of the conversation. + If set to an existing ID, it allows an item to be inserted mid-conversation. If the + ID cannot be found, an error will be returned and the item will not be added. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - type + - item + x-oaiMeta: + name: conversation.item.create + group: realtime + example: | + { + "type": "conversation.item.create", + "item": { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "hi" + } + ] + }, + "event_id": "b904fba0-0ec4-40af-8bbb-f908a9b26793", + } + RealtimeClientEventConversationItemDelete: + type: object + description: | + Send this event when you want to remove any item from the conversation + history. The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.delete`. + x-stainless-const: true + const: conversation.item.delete + item_id: + type: string + description: The ID of the item to delete. + required: + - type + - item_id + x-oaiMeta: + name: conversation.item.delete + group: realtime + example: | + { + "event_id": "event_901", + "type": "conversation.item.delete", + "item_id": "item_003" + } + RealtimeClientEventConversationItemRetrieve: + type: object + description: > + Send this event when you want to retrieve the server's representation of a specific item in the + conversation history. This is useful, for example, to inspect user audio after noise cancellation and + VAD. + + The server will respond with a `conversation.item.retrieved` event, + + unless the item does not exist in the conversation history, in which case the + + server will respond with an error. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.retrieve`. + x-stainless-const: true + const: conversation.item.retrieve + item_id: + type: string + description: The ID of the item to retrieve. + required: + - type + - item_id + x-oaiMeta: + name: conversation.item.retrieve + group: realtime + example: | + { + "event_id": "event_901", + "type": "conversation.item.retrieve", + "item_id": "item_003" + } + RealtimeClientEventConversationItemTruncate: + type: object + description: | + Send this event to truncate a previous assistant message’s audio. The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `conversation.item.truncate`. + x-stainless-const: true + const: conversation.item.truncate + item_id: + type: string + description: | + The ID of the assistant message item to truncate. Only assistant message + items can be truncated. + content_index: + type: integer + description: The index of the content part to truncate. Set this to `0`. + audio_end_ms: + type: integer + description: | + Inclusive duration up to which audio is truncated, in milliseconds. If + the audio_end_ms is greater than the actual audio duration, the server + will respond with an error. + required: + - type + - item_id + - content_index + - audio_end_ms + x-oaiMeta: + name: conversation.item.truncate + group: realtime + example: | + { + "event_id": "event_678", + "type": "conversation.item.truncate", + "item_id": "item_002", + "content_index": 0, + "audio_end_ms": 1500 + } + RealtimeClientEventInputAudioBufferAppend: + type: object + description: | + Send this event to append audio bytes to the input audio buffer. The audio + buffer is temporary storage you can write to and later commit. A "commit" will create a new + user message item in the conversation history from the buffer content and clear the buffer. + Input audio transcription (if enabled) will be generated when the buffer is committed. + + If VAD is enabled the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. Input audio noise reduction operates on writes to the audio buffer. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike most other client events, the server will + not send a confirmation response to this event. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.append`. + x-stainless-const: true + const: input_audio_buffer.append + audio: + type: string + description: | + Base64-encoded audio bytes. This must be in the format specified by the + `input_audio_format` field in the session configuration. + required: + - type + - audio + x-oaiMeta: + name: input_audio_buffer.append + group: realtime + example: | + { + "event_id": "event_456", + "type": "input_audio_buffer.append", + "audio": "Base64EncodedAudioData" + } + RealtimeClientEventInputAudioBufferClear: + type: object + description: | + Send this event to clear the audio bytes in the buffer. The server will + respond with an `input_audio_buffer.cleared` event. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.clear`. + x-stainless-const: true + const: input_audio_buffer.clear + required: + - type + x-oaiMeta: + name: input_audio_buffer.clear + group: realtime + example: | + { + "event_id": "event_012", + "type": "input_audio_buffer.clear" + } + RealtimeClientEventInputAudioBufferCommit: + type: object + description: > + Send this event to commit the user input audio buffer, which will create a new user message item in + the conversation. This event will produce an error if the input audio buffer is empty. When in Server + VAD mode, the client does not need to send this event, the server will commit the audio buffer + automatically. + + + Committing the input audio buffer will trigger input audio transcription (if enabled in session + configuration), but it will not create a response from the model. The server will respond with an + `input_audio_buffer.committed` event. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `input_audio_buffer.commit`. + x-stainless-const: true + const: input_audio_buffer.commit + required: + - type + x-oaiMeta: + name: input_audio_buffer.commit + group: realtime + example: | + { + "event_id": "event_789", + "type": "input_audio_buffer.commit" + } + RealtimeClientEventOutputAudioBufferClear: + type: object + description: > + **WebRTC Only:** Emit to cut off the current audio response. This will trigger the server to + + stop generating audio and emit a `output_audio_buffer.cleared` event. This + + event should be preceded by a `response.cancel` client event to stop the + + generation of the current response. + + [Learn + more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + properties: + event_id: + type: string + description: The unique ID of the client event used for error handling. + type: + description: The event type, must be `output_audio_buffer.clear`. + x-stainless-const: true + const: output_audio_buffer.clear + required: + - type + x-oaiMeta: + name: output_audio_buffer.clear + group: realtime + example: | + { + "event_id": "optional_client_event_id", + "type": "output_audio_buffer.clear" + } + RealtimeClientEventResponseCancel: + type: object + description: | + Send this event to cancel an in-progress response. The server will respond + with a `response.done` event with a status of `response.status=cancelled`. If + there is no response to cancel, the server will respond with an error. It's safe + to call `response.cancel` even if no response is in progress, an error will be + returned the session will remain unaffected. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `response.cancel`. + x-stainless-const: true + const: response.cancel + response_id: + type: string + description: | + A specific response ID to cancel - if not provided, will cancel an + in-progress response in the default conversation. + required: + - type + x-oaiMeta: + name: response.cancel + group: realtime + example: | + { + "type": "response.cancel" + "response_id": "resp_12345", + } + RealtimeClientEventResponseCreate: + type: object + description: | + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history by default. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event includes inference configuration like + `instructions` and `tools`. If these are set, they will override the Session's + configuration for this Response only. + + Responses can be created out-of-band of the default Conversation, meaning that they can + have arbitrary input, and it's possible to disable writing the output to the Conversation. + Only one Response can write to the default Conversation at a time, but otherwise multiple + Responses can be created in parallel. The `metadata` field is a good way to disambiguate + multiple simultaneous Responses. + + Clients can set `conversation` to `none` to create a Response that does not write to the default + Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting + raw Items and references to existing Items. + properties: + event_id: + type: string + maxLength: 512 + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `response.create`. + x-stainless-const: true + const: response.create + response: + $ref: '#/components/schemas/RealtimeResponseCreateParams' + required: + - type + x-oaiMeta: + name: response.create + group: realtime + example: | + // Trigger a response with the default Conversation and no special parameters + { + "type": "response.create", + } + + // Trigger an out-of-band response that does not write to the default Conversation + { + "type": "response.create", + "response": { + "instructions": "Provide a concise answer.", + "tools": [], // clear any session tools + "conversation": "none", + "output_modalities": ["text"], + "metadata": { + "response_purpose": "summarization" + }, + "input": [ + { + "type": "item_reference", + "id": "item_12345", + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Summarize the above message in one sentence." + } + ] + } + ], + } + } + RealtimeClientEventSessionUpdate: + type: object + description: > + Send this event to update the session’s configuration. + + The client may send this event at any time to update any field + + except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs + yet. + + + When the server receives a `session.update`, it will respond + + with a `session.updated` event showing the full, effective configuration. + + Only the fields that are present in the `session.update` are updated. To clear a field like + + `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array. + + To clear a field like `turn_detection`, pass `null`. + properties: + event_id: + type: string + maxLength: 512 + description: >- + Optional client-generated ID used to identify this event. This is an arbitrary string that a + client may assign. It will be passed back if there is an error with the event, but the + corresponding `session.updated` event will not include it. + type: + description: The event type, must be `session.update`. + x-stainless-const: true + const: session.update + session: + type: object + description: | + Update the Realtime session. Choose either a realtime + session or a transcription session. + anyOf: + - $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + - $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequestGA' + required: + - type + - session + x-oaiMeta: + name: session.update + group: realtime + example: | + { + "type": "session.update", + "session": { + "type": "realtime", + "instructions": "You are a creative assistant that helps with design tasks.", + "tools": [ + { + "type": "function", + "name": "display_color_palette", + "description": "Call this function when a user asks for a color palette.", + "parameters": { + "type": "object", + "strict": true, + "properties": { + "theme": { + "type": "string", + "description": "Description of the theme for the color scheme." + }, + "colors": { + "type": "array", + "description": "Array of five hex color codes based on the theme.", + "items": { + "type": "string", + "description": "Hex color code" + } + } + }, + "required": [ + "theme", + "colors" + ] + } + } + ], + "tool_choice": "auto" + }, + "event_id": "5fc543c4-f59c-420f-8fb9-68c45d1546a7", + } + RealtimeClientEventTranscriptionSessionUpdate: + type: object + description: | + Send this event to update a transcription session. + properties: + event_id: + type: string + description: Optional client-generated ID used to identify this event. + type: + description: The event type, must be `transcription_session.update`. + x-stainless-const: true + const: transcription_session.update + session: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequest' + required: + - type + - session + x-oaiMeta: + name: transcription_session.update + group: realtime + example: | + { + "type": "transcription_session.update", + "session": { + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "prompt": "", + "language": "" + }, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 500, + "create_response": true, + }, + "input_audio_noise_reduction": { + "type": "near_field" + }, + "include": [ + "item.input_audio_transcription.logprobs", + ] + } + } + RealtimeConversationItem: + description: A single item within a Realtime conversation. + anyOf: + - $ref: '#/components/schemas/RealtimeConversationItemMessageSystem' + - $ref: '#/components/schemas/RealtimeConversationItemMessageUser' + - $ref: '#/components/schemas/RealtimeConversationItemMessageAssistant' + - $ref: '#/components/schemas/RealtimeConversationItemFunctionCall' + - $ref: '#/components/schemas/RealtimeConversationItemFunctionCallOutput' + - $ref: '#/components/schemas/RealtimeMCPApprovalResponse' + - $ref: '#/components/schemas/RealtimeMCPListTools' + - $ref: '#/components/schemas/RealtimeMCPToolCall' + - $ref: '#/components/schemas/RealtimeMCPApprovalRequest' + discriminator: + propertyName: type + RealtimeConversationItemFunctionCall: + type: object + title: Realtime function call item + description: A function call item in a Realtime conversation. + properties: + id: + type: string + description: The unique ID of the item. This may be provided by the client or generated by the server. + object: + type: string + enum: + - realtime.item + description: >- + Identifier for the API object being returned - always `realtime.item`. Optional when creating a + new item. + x-stainless-const: true + type: + type: string + enum: + - function_call + description: The type of the item. Always `function_call`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: The status of the item. Has no effect on the conversation. + call_id: + type: string + description: The ID of the function call. + name: + type: string + description: The name of the function being called. + arguments: + type: string + description: >- + The arguments of the function call. This is a JSON-encoded string representing the arguments + passed to the function, for example `{"arg1": "value1", "arg2": 42}`. + required: + - type + - name + - arguments + RealtimeConversationItemFunctionCallOutput: + type: object + title: Realtime function call output item + description: A function call output item in a Realtime conversation. + properties: + id: + type: string + description: The unique ID of the item. This may be provided by the client or generated by the server. + object: + type: string + enum: + - realtime.item + description: >- + Identifier for the API object being returned - always `realtime.item`. Optional when creating a + new item. + x-stainless-const: true + type: + type: string + enum: + - function_call_output + description: The type of the item. Always `function_call_output`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: The status of the item. Has no effect on the conversation. + call_id: + type: string + description: The ID of the function call this output is for. + output: + type: string + description: >- + The output of the function call, this is free text and can contain any information or simply be + empty. + required: + - type + - call_id + - output + RealtimeConversationItemMessageAssistant: + type: object + title: Realtime assistant message item + description: An assistant message item in a Realtime conversation. + properties: + id: + type: string + description: The unique ID of the item. This may be provided by the client or generated by the server. + object: + type: string + enum: + - realtime.item + description: >- + Identifier for the API object being returned - always `realtime.item`. Optional when creating a + new item. + x-stainless-const: true + type: + type: string + enum: + - message + description: The type of the item. Always `message`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: The status of the item. Has no effect on the conversation. + role: + type: string + enum: + - assistant + description: The role of the message sender. Always `assistant`. + x-stainless-const: true + content: + type: array + description: The content of the message. + items: + type: object + properties: + type: + type: string + enum: + - output_text + - output_audio + description: >- + The content type, `output_text` or `output_audio` depending on the session + `output_modalities` configuration. + text: + type: string + description: The text content. + audio: + type: string + description: >- + Base64-encoded audio bytes, these will be parsed as the format specified in the session + output audio type configuration. This defaults to PCM 16-bit 24kHz mono if not specified. + transcript: + type: string + description: >- + The transcript of the audio content, this will always be present if the output type is + `audio`. + required: + - type + - role + - content + RealtimeConversationItemMessageSystem: + type: object + title: Realtime system message item + description: >- + A system message in a Realtime conversation can be used to provide additional context or instructions + to the model. This is similar but distinct from the instruction prompt provided at the start of a + conversation, as system messages can be added at any point in the conversation. For major changes to + the conversation's behavior, use instructions, but for smaller updates (e.g. "the user is now asking + about a different topic"), use system messages. + properties: + id: + type: string + description: The unique ID of the item. This may be provided by the client or generated by the server. + object: + type: string + enum: + - realtime.item + description: >- + Identifier for the API object being returned - always `realtime.item`. Optional when creating a + new item. + x-stainless-const: true + type: + type: string + enum: + - message + description: The type of the item. Always `message`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: The status of the item. Has no effect on the conversation. + role: + type: string + enum: + - system + description: The role of the message sender. Always `system`. + x-stainless-const: true + content: + type: array + description: The content of the message. + items: + type: object + properties: + type: + type: string + enum: + - input_text + description: The content type. Always `input_text` for system messages. + x-stainless-const: true + text: + type: string + description: The text content. + required: + - type + - role + - content + RealtimeConversationItemMessageUser: + type: object + title: Realtime user message item + description: A user message item in a Realtime conversation. + properties: + id: + type: string + description: The unique ID of the item. This may be provided by the client or generated by the server. + object: + type: string + enum: + - realtime.item + description: >- + Identifier for the API object being returned - always `realtime.item`. Optional when creating a + new item. + x-stainless-const: true + type: + type: string + enum: + - message + description: The type of the item. Always `message`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: The status of the item. Has no effect on the conversation. + role: + type: string + enum: + - user + description: The role of the message sender. Always `user`. + x-stainless-const: true + content: + type: array + description: The content of the message. + items: + type: object + properties: + type: + type: string + enum: + - input_text + - input_audio + - input_image + description: The content type (`input_text`, `input_audio`, or `input_image`). + text: + type: string + description: The text content (for `input_text`). + audio: + type: string + description: >- + Base64-encoded audio bytes (for `input_audio`), these will be parsed as the format specified + in the session input audio type configuration. This defaults to PCM 16-bit 24kHz mono if not + specified. + image_url: + type: string + description: >- + Base64-encoded image bytes (for `input_image`) as a data URI. For example + `...`. Supported formats are PNG and JPEG. + detail: + type: string + description: The detail level of the image (for `input_image`). `auto` will default to `high`. + default: auto + enum: + - auto + - low + - high + transcript: + type: string + description: >- + Transcript of the audio (for `input_audio`). This is not sent to the model, but will be + attached to the message item for reference. + required: + - type + - role + - content + RealtimeConversationItemWithReference: + type: object + description: The item to add to the conversation. + properties: + id: + type: string + description: | + For an item of type (`message` | `function_call` | `function_call_output`) + this field allows the client to assign the unique ID of the item. It is + not required because the server will generate one if not provided. + + For an item of type `item_reference`, this field is required and is a + reference to any item that has previously existed in the conversation. + type: + type: string + enum: + - message + - function_call + - function_call_output + - item_reference + description: | + The type of the item (`message`, `function_call`, `function_call_output`, `item_reference`). + object: + type: string + enum: + - realtime.item + description: | + Identifier for the API object being returned - always `realtime.item`. + x-stainless-const: true + status: + type: string + enum: + - completed + - incomplete + - in_progress + description: | + The status of the item (`completed`, `incomplete`, `in_progress`). These have no effect + on the conversation, but are accepted for consistency with the + `conversation.item.created` event. + role: + type: string + enum: + - user + - assistant + - system + description: | + The role of the message sender (`user`, `assistant`, `system`), only + applicable for `message` items. + content: + type: array + description: | + The content of the message, applicable for `message` items. + - Message items of role `system` support only `input_text` content + - Message items of role `user` support `input_text` and `input_audio` + content + - Message items of role `assistant` support `text` content. + items: + type: object + properties: + type: + type: string + enum: + - input_text + - input_audio + - item_reference + - text + description: | + The content type (`input_text`, `input_audio`, `item_reference`, `text`). + text: + type: string + description: | + The text content, used for `input_text` and `text` content types. + id: + type: string + description: | + ID of a previous conversation item to reference (for `item_reference` + content types in `response.create` events). These can reference both + client and server created items. + audio: + type: string + description: | + Base64-encoded audio bytes, used for `input_audio` content type. + transcript: + type: string + description: | + The transcript of the audio, used for `input_audio` content type. + call_id: + type: string + description: | + The ID of the function call (for `function_call` and + `function_call_output` items). If passed on a `function_call_output` + item, the server will check that a `function_call` item with the same + ID exists in the conversation history. + name: + type: string + description: | + The name of the function being called (for `function_call` items). + arguments: + type: string + description: | + The arguments of the function call (for `function_call` items). + output: + type: string + description: | + The output of the function call (for `function_call_output` items). + RealtimeCreateClientSecretRequest: + type: object + title: Realtime client secret creation request + description: | + Create a session and client secret for the Realtime API. The request can specify + either a realtime or a transcription session configuration. + [Learn more about the Realtime API](https://platform.openai.com/docs/guides/realtime). + properties: + expires_after: + type: object + title: Client secret expiration + description: | + Configuration for the client secret expiration. Expiration refers to the time after which + a client secret will no longer be valid for creating sessions. The session itself may + continue after that time once started. A secret can be used to create multiple sessions + until it expires. + properties: + anchor: + type: string + enum: + - created_at + description: > + The anchor point for the client secret expiration, meaning that `seconds` will be added to the + `created_at` time of the client secret to produce an expiration timestamp. Only `created_at` + is currently supported. + default: created_at + x-stainless-const: true + seconds: + type: integer + description: > + The number of seconds from the anchor point to the expiration. Select a value between `10` and + `7200` (2 hours). This default to 600 seconds (10 minutes) if not specified. + minimum: 10 + maximum: 7200 + default: 600 + session: + title: Session configuration + description: | + Session configuration to use for the client secret. Choose either a realtime + session or a transcription session. + anyOf: + - $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + - $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequestGA' + discriminator: + propertyName: type + RealtimeCreateClientSecretResponse: + type: object + title: Realtime session and client secret + description: | + Response from creating a session and client secret for the Realtime API. + properties: + value: + type: string + description: The generated client secret value. + expires_at: + type: integer + description: Expiration timestamp for the client secret, in seconds since epoch. + session: + title: Session configuration + description: | + The session configuration for either a realtime or transcription session. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/RealtimeSessionCreateResponseGA' + - $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateResponseGA' + required: + - value + - expires_at + - session + x-oaiMeta: + name: Session response object + group: realtime + example: | + { + "value": "ek_68af296e8e408191a1120ab6383263c2", + "expires_at": 1756310470, + "session": { + "type": "realtime", + "object": "realtime.session", + "id": "sess_C9CiUVUzUzYIssh3ELY1d", + "model": "gpt-realtime-2025-08-25", + "output_modalities": [ + "audio" + ], + "instructions": "You are a friendly assistant.", + "tools": [], + "tool_choice": "auto", + "max_output_tokens": "inf", + "tracing": null, + "truncation": "auto", + "prompt": null, + "expires_at": 0, + "audio": { + "input": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "transcription": null, + "noise_reduction": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200, + "idle_timeout_ms": null, + "create_response": true, + "interrupt_response": true + } + }, + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "alloy", + "speed": 1.0 + } + }, + "include": null + } + } + RealtimeFunctionTool: + type: object + title: Function tool + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + x-stainless-const: true + name: + type: string + description: The name of the function. + description: + type: string + description: | + The description of the function, including guidance on when and how + to call it, and guidance about what to tell the user when calling + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + RealtimeMCPApprovalRequest: + type: object + title: Realtime MCP approval request + description: | + A Realtime item requesting human approval of a tool invocation. + properties: + type: + type: string + enum: + - mcp_approval_request + description: The type of the item. Always `mcp_approval_request`. + x-stainless-const: true + id: + type: string + description: The unique ID of the approval request. + server_label: + type: string + description: The label of the MCP server making the request. + name: + type: string + description: The name of the tool to run. + arguments: + type: string + description: A JSON string of arguments for the tool. + required: + - type + - id + - server_label + - name + - arguments + RealtimeMCPApprovalResponse: + type: object + title: Realtime MCP approval response + description: | + A Realtime item responding to an MCP approval request. + properties: + type: + type: string + enum: + - mcp_approval_response + description: The type of the item. Always `mcp_approval_response`. + x-stainless-const: true + id: + type: string + description: The unique ID of the approval response. + approval_request_id: + type: string + description: The ID of the approval request being answered. + approve: + type: boolean + description: Whether the request was approved. + reason: + anyOf: + - type: string + description: Optional reason for the decision. + - type: 'null' + required: + - type + - id + - approval_request_id + - approve + RealtimeMCPHTTPError: + type: object + title: Realtime MCP HTTP error + properties: + type: + type: string + enum: + - http_error + x-stainless-const: true + code: + type: integer + message: + type: string + required: + - type + - code + - message + RealtimeMCPListTools: + type: object + title: Realtime MCP list tools + description: | + A Realtime item listing tools available on an MCP server. + properties: + type: + type: string + enum: + - mcp_list_tools + description: The type of the item. Always `mcp_list_tools`. + x-stainless-const: true + id: + type: string + description: The unique ID of the list. + server_label: + type: string + description: The label of the MCP server. + tools: + type: array + items: + $ref: '#/components/schemas/MCPListToolsTool' + description: The tools available on the server. + required: + - type + - server_label + - tools + RealtimeMCPProtocolError: + type: object + title: Realtime MCP protocol error + properties: + type: + type: string + enum: + - protocol_error + x-stainless-const: true + code: + type: integer + message: + type: string + required: + - type + - code + - message + RealtimeMCPToolCall: + type: object + title: Realtime MCP tool call + description: | + A Realtime item representing an invocation of a tool on an MCP server. + properties: + type: + type: string + enum: + - mcp_call + description: The type of the item. Always `mcp_call`. + x-stainless-const: true + id: + type: string + description: The unique ID of the tool call. + server_label: + type: string + description: The label of the MCP server running the tool. + name: + type: string + description: The name of the tool that was run. + arguments: + type: string + description: A JSON string of the arguments passed to the tool. + approval_request_id: + anyOf: + - type: string + description: The ID of an associated approval request, if any. + - type: 'null' + output: + anyOf: + - type: string + description: The output from the tool call. + - type: 'null' + error: + anyOf: + - description: The error from the tool call, if any. + anyOf: + - $ref: '#/components/schemas/RealtimeMCPProtocolError' + - $ref: '#/components/schemas/RealtimeMCPToolExecutionError' + - $ref: '#/components/schemas/RealtimeMCPHTTPError' + discriminator: + propertyName: type + - type: 'null' + required: + - type + - id + - server_label + - name + - arguments + RealtimeMCPToolExecutionError: + type: object + title: Realtime MCP tool execution error + properties: + type: + type: string + enum: + - tool_execution_error + x-stainless-const: true + message: + type: string + required: + - type + - message + RealtimeResponse: + type: object + description: The response resource. + properties: + id: + type: string + description: The unique ID of the response, will look like `resp_1234`. + object: + description: The object type, must be `realtime.response`. + x-stainless-const: true + const: realtime.response + status: + type: string + enum: + - completed + - cancelled + - failed + - incomplete + - in_progress + description: | + The final status of the response (`completed`, `cancelled`, `failed`, or + `incomplete`, `in_progress`). + status_details: + type: object + description: Additional details about the status. + properties: + type: + type: string + enum: + - completed + - cancelled + - incomplete + - failed + description: | + The type of error that caused the response to fail, corresponding + with the `status` field (`completed`, `cancelled`, `incomplete`, + `failed`). + reason: + type: string + enum: + - turn_detected + - client_cancelled + - max_output_tokens + - content_filter + description: > + The reason the Response did not complete. For a `cancelled` Response, one of `turn_detected` + (the server VAD detected a new start of speech) or `client_cancelled` (the client sent a + cancel event). For an `incomplete` Response, one of `max_output_tokens` or `content_filter` + (the server-side safety filter activated and cut off the response). + error: + type: object + description: | + A description of the error that caused the response to fail, + populated when the `status` is `failed`. + properties: + type: + type: string + description: The type of error. + code: + type: string + description: Error code, if any. + output: + type: array + description: The list of output items generated by the response. + items: + $ref: '#/components/schemas/RealtimeConversationItem' + metadata: + $ref: '#/components/schemas/Metadata' + audio: + type: object + description: Configuration for audio output. + properties: + output: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the output audio. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + default: alloy + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, `verse`, `marin`, and `cedar`. We recommend `marin` and `cedar` for + best quality. + usage: + type: object + description: | + Usage statistics for the Response, this will correspond to billing. A + Realtime API session will maintain a conversation context and append new + Items to the Conversation, thus output from previous turns (text and + audio tokens) will become the input for later turns. + properties: + total_tokens: + type: integer + description: | + The total number of tokens in the Response including input and output + text and audio tokens. + input_tokens: + type: integer + description: | + The number of input tokens used in the Response, including text and + audio tokens. + output_tokens: + type: integer + description: | + The number of output tokens sent in the Response, including text and + audio tokens. + input_token_details: + type: object + description: >- + Details about the input tokens used in the Response. Cached tokens are tokens from previous + turns in the conversation that are included as context for the current response. Cached tokens + here are counted as a subset of input tokens, meaning input tokens will include cached and + uncached tokens. + properties: + cached_tokens: + type: integer + description: The number of cached tokens used as input for the Response. + text_tokens: + type: integer + description: The number of text tokens used as input for the Response. + image_tokens: + type: integer + description: The number of image tokens used as input for the Response. + audio_tokens: + type: integer + description: The number of audio tokens used as input for the Response. + cached_tokens_details: + type: object + description: Details about the cached tokens used as input for the Response. + properties: + text_tokens: + type: integer + description: The number of cached text tokens used as input for the Response. + image_tokens: + type: integer + description: The number of cached image tokens used as input for the Response. + audio_tokens: + type: integer + description: The number of cached audio tokens used as input for the Response. + output_token_details: + type: object + description: Details about the output tokens used in the Response. + properties: + text_tokens: + type: integer + description: The number of text tokens used in the Response. + audio_tokens: + type: integer + description: The number of audio tokens used in the Response. + conversation_id: + description: | + Which conversation the response is added to, determined by the `conversation` + field in the `response.create` event. If `auto`, the response will be added to + the default conversation and the value of `conversation_id` will be an id like + `conv_1234`. If `none`, the response will not be added to any conversation and + the value of `conversation_id` will be `null`. If responses are being triggered + automatically by VAD the response will be added to the default conversation + type: string + output_modalities: + type: array + description: | + The set of modalities the model used to respond, currently the only possible values are + `[\"audio\"]`, `[\"text\"]`. Audio output always include a text transcript. Setting the + output to mode `text` will disable audio output from the model. + items: + type: string + enum: + - text + - audio + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls, that was used in this response. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + RealtimeResponseCreateParams: + type: object + description: Create a new Realtime response with these parameters + properties: + output_modalities: + type: array + description: | + The set of modalities the model used to respond, currently the only possible values are + `[\"audio\"]`, `[\"text\"]`. Audio output always include a text transcript. Setting the + output to mode `text` will disable audio output from the model. + items: + type: string + enum: + - text + - audio + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to model calls. This field allows + the client to guide the model on desired responses. The model can be instructed on response + content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh + frequently"). The instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field is not set and are + visible in the `session.created` event at the start of the session. + audio: + type: object + description: Configuration for audio input and output. + properties: + output: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the output audio. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + default: alloy + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, `verse`, `marin`, and `cedar`. We recommend `marin` and `cedar` for + best quality. + tools: + type: array + description: Tools available to the model. + items: + anyOf: + - $ref: '#/components/schemas/RealtimeFunctionTool' + - $ref: '#/components/schemas/MCPTool' + tool_choice: + description: | + How the model chooses tools. Provide one of the string modes or force a specific + function/MCP tool. + default: auto + anyOf: + - $ref: '#/components/schemas/ToolChoiceOptions' + - $ref: '#/components/schemas/ToolChoiceFunction' + - $ref: '#/components/schemas/ToolChoiceMCP' + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + conversation: + description: | + Controls which conversation the response is added to. Currently supports + `auto` and `none`, with `auto` as the default value. The `auto` value + means that the contents of the response will be added to the default + conversation. Set this to `none` to create an out-of-band response which + will not add items to default conversation. + anyOf: + - type: string + - type: string + default: auto + enum: + - auto + - none + metadata: + $ref: '#/components/schemas/Metadata' + prompt: + $ref: '#/components/schemas/Prompt' + input: + type: array + description: | + Input items to include in the prompt for the model. Using this field + creates a new context for this Response instead of using the default + conversation. An empty array `[]` will clear the context for this Response. + Note that this can include references to items that previously appeared in the session + using their id. + items: + $ref: '#/components/schemas/RealtimeConversationItem' + RealtimeServerEvent: + discriminator: + propertyName: type + description: | + A realtime server event. + anyOf: + - $ref: '#/components/schemas/RealtimeServerEventConversationCreated' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemCreated' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemDeleted' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemInputAudioTranscriptionCompleted' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemInputAudioTranscriptionDelta' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemInputAudioTranscriptionFailed' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemRetrieved' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemTruncated' + - $ref: '#/components/schemas/RealtimeServerEventError' + - $ref: '#/components/schemas/RealtimeServerEventInputAudioBufferCleared' + - $ref: '#/components/schemas/RealtimeServerEventInputAudioBufferCommitted' + - $ref: '#/components/schemas/RealtimeServerEventInputAudioBufferSpeechStarted' + - $ref: '#/components/schemas/RealtimeServerEventInputAudioBufferSpeechStopped' + - $ref: '#/components/schemas/RealtimeServerEventRateLimitsUpdated' + - $ref: '#/components/schemas/RealtimeServerEventResponseAudioDelta' + - $ref: '#/components/schemas/RealtimeServerEventResponseAudioDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseAudioTranscriptDelta' + - $ref: '#/components/schemas/RealtimeServerEventResponseAudioTranscriptDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseContentPartAdded' + - $ref: '#/components/schemas/RealtimeServerEventResponseContentPartDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseCreated' + - $ref: '#/components/schemas/RealtimeServerEventResponseDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseFunctionCallArgumentsDelta' + - $ref: '#/components/schemas/RealtimeServerEventResponseFunctionCallArgumentsDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseOutputItemAdded' + - $ref: '#/components/schemas/RealtimeServerEventResponseOutputItemDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseTextDelta' + - $ref: '#/components/schemas/RealtimeServerEventResponseTextDone' + - $ref: '#/components/schemas/RealtimeServerEventSessionCreated' + - $ref: '#/components/schemas/RealtimeServerEventSessionUpdated' + - $ref: '#/components/schemas/RealtimeServerEventOutputAudioBufferStarted' + - $ref: '#/components/schemas/RealtimeServerEventOutputAudioBufferStopped' + - $ref: '#/components/schemas/RealtimeServerEventOutputAudioBufferCleared' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemAdded' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemDone' + - $ref: '#/components/schemas/RealtimeServerEventInputAudioBufferTimeoutTriggered' + - $ref: '#/components/schemas/RealtimeServerEventConversationItemInputAudioTranscriptionSegment' + - $ref: '#/components/schemas/RealtimeServerEventMCPListToolsInProgress' + - $ref: '#/components/schemas/RealtimeServerEventMCPListToolsCompleted' + - $ref: '#/components/schemas/RealtimeServerEventMCPListToolsFailed' + - $ref: '#/components/schemas/RealtimeServerEventResponseMCPCallArgumentsDelta' + - $ref: '#/components/schemas/RealtimeServerEventResponseMCPCallArgumentsDone' + - $ref: '#/components/schemas/RealtimeServerEventResponseMCPCallInProgress' + - $ref: '#/components/schemas/RealtimeServerEventResponseMCPCallCompleted' + - $ref: '#/components/schemas/RealtimeServerEventResponseMCPCallFailed' + RealtimeServerEventConversationCreated: + type: object + description: | + Returned when a conversation is created. Emitted right after session creation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.created`. + x-stainless-const: true + const: conversation.created + conversation: + type: object + description: The conversation resource. + properties: + id: + type: string + description: The unique ID of the conversation. + object: + description: The object type, must be `realtime.conversation`. + const: realtime.conversation + required: + - event_id + - type + - conversation + x-oaiMeta: + name: conversation.created + group: realtime + example: | + { + "event_id": "event_9101", + "type": "conversation.created", + "conversation": { + "id": "conv_001", + "object": "realtime.conversation" + } + } + RealtimeServerEventConversationItemAdded: + type: object + description: > + Sent by the server when an Item is added to the default Conversation. This can happen in several + cases: + + - When the client sends a `conversation.item.create` event. + + - When the input audio buffer is committed. In this case the item will be a user message containing + the audio from the buffer. + + - When the model is generating a Response. In this case the `conversation.item.added` event will be + sent when the model starts generating a specific Item, and thus it will not yet have any content (and + `status` will be `in_progress`). + + + The event will include the full content of the Item (except when model is generating a Response) + except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if + necessary. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.added`. + x-stainless-const: true + const: conversation.item.added + previous_item_id: + anyOf: + - type: string + description: | + The ID of the item that precedes this one, if any. This is used to + maintain ordering when items are inserted. + - type: 'null' + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.added + group: realtime + example: | + { + "type": "conversation.item.added", + "event_id": "event_C9G8pjSJCfRNEhMEnYAVy", + "previous_item_id": null, + "item": { + "id": "item_C9G8pGVKYnaZu8PH5YQ9O", + "type": "message", + "status": "completed", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "hi" + } + ] + } + } + RealtimeServerEventConversationItemCreated: + type: object + description: | + Returned when a conversation item is created. There are several scenarios that produce this event: + - The server is generating a Response, which if successful will produce + either one or two Items, which will be of type `message` + (role `assistant`) or type `function_call`. + - The input audio buffer has been committed, either by the client or the + server (in `server_vad` mode). The server will take the content of the + input audio buffer and add it to a new user message Item. + - The client has sent a `conversation.item.create` event to add a new Item + to the Conversation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.created`. + x-stainless-const: true + const: conversation.item.created + previous_item_id: + anyOf: + - type: string + description: | + The ID of the preceding item in the Conversation context, allows the + client to understand the order of the conversation. Can be `null` if the + item has no predecessor. + - type: 'null' + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.created + group: realtime + example: | + { + "event_id": "event_1920", + "type": "conversation.item.created", + "previous_item_id": "msg_002", + "item": { + "id": "msg_003", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "user", + "content": [] + } + } + RealtimeServerEventConversationItemDeleted: + type: object + description: | + Returned when an item in the conversation is deleted by the client with a + `conversation.item.delete` event. This event is used to synchronize the + server's understanding of the conversation history with the client's view. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.deleted`. + x-stainless-const: true + const: conversation.item.deleted + item_id: + type: string + description: The ID of the item that was deleted. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: conversation.item.deleted + group: realtime + example: | + { + "event_id": "event_2728", + "type": "conversation.item.deleted", + "item_id": "msg_005" + } + RealtimeServerEventConversationItemDone: + type: object + description: > + Returned when a conversation item is finalized. + + + The event will include the full content of the Item except for audio data, which can be retrieved + separately with a `conversation.item.retrieve` event if needed. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.done`. + x-stainless-const: true + const: conversation.item.done + previous_item_id: + anyOf: + - type: string + description: | + The ID of the item that precedes this one, if any. This is used to + maintain ordering when items are inserted. + - type: 'null' + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.done + group: realtime + example: | + { + "type": "conversation.item.done", + "event_id": "event_CCXLgMZPo3qioWCeQa4WH", + "previous_item_id": "item_CCXLecNJVIVR2HUy3ABLj", + "item": { + "id": "item_CCXLfxmM5sXVJVz4mCa2S", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_audio", + "transcript": "Oh, I can hear you loud and clear! Sounds like we're connected just fine. What can I help you with today?" + } + ] + } + } + RealtimeServerEventConversationItemInputAudioTranscriptionCompleted: + type: object + description: | + This event is the output of audio transcription for user audio written to the + user audio buffer. Transcription begins when the input audio buffer is + committed by the client or server (when VAD is enabled). Transcription runs + asynchronously with Response creation, so this event may come before or after + the Response events. + + Realtime API models accept audio natively, and thus input transcription is a + separate process run on a separate ASR (Automatic Speech Recognition) model. + The transcript may diverge somewhat from the model's interpretation, and + should be treated as a rough guide. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.item.input_audio_transcription.completed + description: | + The event type, must be + `conversation.item.input_audio_transcription.completed`. + x-stainless-const: true + item_id: + type: string + description: The ID of the item containing the audio that is being transcribed. + content_index: + type: integer + description: The index of the content part containing the audio. + transcript: + type: string + description: The transcribed text. + logprobs: + anyOf: + - type: array + description: The log probabilities of the transcription. + items: + $ref: '#/components/schemas/LogProbProperties' + - type: 'null' + usage: + type: object + description: >- + Usage statistics for the transcription, this is billed according to the ASR model's pricing rather + than the realtime model's pricing. + anyOf: + - $ref: '#/components/schemas/TranscriptTextUsageTokens' + title: TranscriptTextUsageTokens + - $ref: '#/components/schemas/TranscriptTextUsageDuration' + title: TranscriptTextUsageDuration + required: + - event_id + - type + - item_id + - content_index + - transcript + - usage + x-oaiMeta: + name: conversation.item.input_audio_transcription.completed + group: realtime + example: | + { + "type": "conversation.item.input_audio_transcription.completed", + "event_id": "event_CCXGRvtUVrax5SJAnNOWZ", + "item_id": "item_CCXGQ4e1ht4cOraEYcuR2", + "content_index": 0, + "transcript": "Hey, can you hear me?", + "usage": { + "type": "tokens", + "total_tokens": 22, + "input_tokens": 13, + "input_token_details": { + "text_tokens": 0, + "audio_tokens": 13 + }, + "output_tokens": 9 + } + } + RealtimeServerEventConversationItemInputAudioTranscriptionDelta: + type: object + description: > + Returned when the text value of an input audio transcription content part is updated with incremental + transcription results. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.input_audio_transcription.delta`. + x-stainless-const: true + const: conversation.item.input_audio_transcription.delta + item_id: + type: string + description: The ID of the item containing the audio that is being transcribed. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The text delta. + logprobs: + anyOf: + - type: array + description: >- + The log probabilities of the transcription. These can be enabled by configurating the session + with `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the array + corresponds a log probability of which token would be selected for this chunk of + transcription. This can help to identify if it was possible there were multiple valid options + for a given chunk of transcription. + items: + $ref: '#/components/schemas/LogProbProperties' + - type: 'null' + required: + - event_id + - type + - item_id + x-oaiMeta: + name: conversation.item.input_audio_transcription.delta + group: realtime + example: | + { + "type": "conversation.item.input_audio_transcription.delta", + "event_id": "event_CCXGRxsAimPAs8kS2Wc7Z", + "item_id": "item_CCXGQ4e1ht4cOraEYcuR2", + "content_index": 0, + "delta": "Hey", + "obfuscation": "aLxx0jTEciOGe" + } + RealtimeServerEventConversationItemInputAudioTranscriptionFailed: + type: object + description: | + Returned when input audio transcription is configured, and a transcription + request for a user message failed. These events are separate from other + `error` events so that the client can identify the related Item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.item.input_audio_transcription.failed + description: | + The event type, must be + `conversation.item.input_audio_transcription.failed`. + x-stainless-const: true + item_id: + type: string + description: The ID of the user message item. + content_index: + type: integer + description: The index of the content part containing the audio. + error: + type: object + description: Details of the transcription error. + properties: + type: + type: string + description: The type of error. + code: + type: string + description: Error code, if any. + message: + type: string + description: A human-readable error message. + param: + type: string + description: Parameter related to the error, if any. + required: + - event_id + - type + - item_id + - content_index + - error + x-oaiMeta: + name: conversation.item.input_audio_transcription.failed + group: realtime + example: | + { + "event_id": "event_2324", + "type": "conversation.item.input_audio_transcription.failed", + "item_id": "msg_003", + "content_index": 0, + "error": { + "type": "transcription_error", + "code": "audio_unintelligible", + "message": "The audio could not be transcribed.", + "param": null + } + } + RealtimeServerEventConversationItemInputAudioTranscriptionSegment: + type: object + description: Returned when an input audio transcription segment is identified for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.input_audio_transcription.segment`. + x-stainless-const: true + const: conversation.item.input_audio_transcription.segment + item_id: + type: string + description: The ID of the item containing the input audio content. + content_index: + type: integer + description: The index of the input audio content part within the item. + text: + type: string + description: The text for this segment. + id: + type: string + description: The segment identifier. + speaker: + type: string + description: The detected speaker label for this segment. + start: + type: number + format: float + description: Start time of the segment in seconds. + end: + type: number + format: float + description: End time of the segment in seconds. + required: + - event_id + - type + - item_id + - content_index + - text + - id + - speaker + - start + - end + x-oaiMeta: + name: conversation.item.input_audio_transcription.segment + group: realtime + example: | + { + "event_id": "event_6501", + "type": "conversation.item.input_audio_transcription.segment", + "item_id": "msg_011", + "content_index": 0, + "text": "hello", + "id": "seg_0001", + "speaker": "spk_1", + "start": 0.0, + "end": 0.4 + } + RealtimeServerEventConversationItemRetrieved: + type: object + description: > + Returned when a conversation item is retrieved with `conversation.item.retrieve`. This is provided as + a way to fetch the server's representation of an item, for example to get access to the post-processed + audio data after noise cancellation and VAD. It includes the full content of the Item, including audio + data. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.retrieved`. + x-stainless-const: true + const: conversation.item.retrieved + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - item + x-oaiMeta: + name: conversation.item.retrieved + group: realtime + example: | + { + "type": "conversation.item.retrieved", + "event_id": "event_CCXGSizgEppa2d4XbKA7K", + "item": { + "id": "item_CCXGRxbY0n6WE4EszhF5w", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "audio", + "transcript": "Yes, I can hear you loud and clear. How can I help you today?", + "audio": "8//2//v/9//q/+//+P/s...", + "format": "pcm16" + } + ] + } + } + RealtimeServerEventConversationItemTruncated: + type: object + description: | + Returned when an earlier assistant audio message item is truncated by the + client with a `conversation.item.truncate` event. This event is used to + synchronize the server's understanding of the audio with the client's playback. + + This action will truncate the audio and remove the server-side text transcript + to ensure there is no text in the context that hasn't been heard by the user. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `conversation.item.truncated`. + x-stainless-const: true + const: conversation.item.truncated + item_id: + type: string + description: The ID of the assistant message item that was truncated. + content_index: + type: integer + description: The index of the content part that was truncated. + audio_end_ms: + type: integer + description: | + The duration up to which the audio was truncated, in milliseconds. + required: + - event_id + - type + - item_id + - content_index + - audio_end_ms + x-oaiMeta: + name: conversation.item.truncated + group: realtime + example: | + { + "event_id": "event_2526", + "type": "conversation.item.truncated", + "item_id": "msg_004", + "content_index": 0, + "audio_end_ms": 1500 + } + RealtimeServerEventError: + type: object + description: | + Returned when an error occurs, which could be a client problem or a server + problem. Most errors are recoverable and the session will stay open, we + recommend to implementors to monitor and log error messages by default. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `error`. + x-stainless-const: true + const: error + error: + type: object + description: Details of the error. + required: + - type + - message + properties: + type: + type: string + description: | + The type of error (e.g., "invalid_request_error", "server_error"). + code: + anyOf: + - type: string + description: Error code, if any. + - type: 'null' + message: + type: string + description: A human-readable error message. + param: + anyOf: + - type: string + description: Parameter related to the error, if any. + - type: 'null' + event_id: + anyOf: + - type: string + description: | + The event_id of the client event that caused the error, if applicable. + - type: 'null' + required: + - event_id + - type + - error + x-oaiMeta: + name: error + group: realtime + example: | + { + "event_id": "event_890", + "type": "error", + "error": { + "type": "invalid_request_error", + "code": "invalid_event", + "message": "The 'type' field is missing.", + "param": null, + "event_id": "event_567" + } + } + RealtimeServerEventInputAudioBufferCleared: + type: object + description: | + Returned when the input audio buffer is cleared by the client with a + `input_audio_buffer.clear` event. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.cleared`. + x-stainless-const: true + const: input_audio_buffer.cleared + required: + - event_id + - type + x-oaiMeta: + name: input_audio_buffer.cleared + group: realtime + example: | + { + "event_id": "event_1314", + "type": "input_audio_buffer.cleared" + } + RealtimeServerEventInputAudioBufferCommitted: + type: object + description: | + Returned when an input audio buffer is committed, either by the client or + automatically in server VAD mode. The `item_id` property is the ID of the user + message item that will be created, thus a `conversation.item.created` event + will also be sent to the client. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.committed`. + x-stainless-const: true + const: input_audio_buffer.committed + previous_item_id: + anyOf: + - type: string + description: | + The ID of the preceding item after which the new item will be inserted. + Can be `null` if the item has no predecessor. + - type: 'null' + item_id: + type: string + description: The ID of the user message item that will be created. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: input_audio_buffer.committed + group: realtime + example: | + { + "event_id": "event_1121", + "type": "input_audio_buffer.committed", + "previous_item_id": "msg_001", + "item_id": "msg_002" + } + RealtimeServerEventInputAudioBufferSpeechStarted: + type: object + description: | + Sent by the server when in `server_vad` mode to indicate that speech has been + detected in the audio buffer. This can happen any time audio is added to the + buffer (unless speech is already detected). The client may want to use this + event to interrupt audio playback or provide visual feedback to the user. + + The client should expect to receive a `input_audio_buffer.speech_stopped` event + when speech stops. The `item_id` property is the ID of the user message item + that will be created when speech stops and will also be included in the + `input_audio_buffer.speech_stopped` event (unless the client manually commits + the audio buffer during VAD activation). + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.speech_started`. + x-stainless-const: true + const: input_audio_buffer.speech_started + audio_start_ms: + type: integer + description: | + Milliseconds from the start of all audio written to the buffer during the + session when speech was first detected. This will correspond to the + beginning of audio sent to the model, and thus includes the + `prefix_padding_ms` configured in the Session. + item_id: + type: string + description: | + The ID of the user message item that will be created when speech stops. + required: + - event_id + - type + - audio_start_ms + - item_id + x-oaiMeta: + name: input_audio_buffer.speech_started + group: realtime + example: | + { + "event_id": "event_1516", + "type": "input_audio_buffer.speech_started", + "audio_start_ms": 1000, + "item_id": "msg_003" + } + RealtimeServerEventInputAudioBufferSpeechStopped: + type: object + description: | + Returned in `server_vad` mode when the server detects the end of speech in + the audio buffer. The server will also send an `conversation.item.created` + event with the user message item that is created from the audio buffer. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.speech_stopped`. + x-stainless-const: true + const: input_audio_buffer.speech_stopped + audio_end_ms: + type: integer + description: | + Milliseconds since the session started when speech stopped. This will + correspond to the end of audio sent to the model, and thus includes the + `min_silence_duration_ms` configured in the Session. + item_id: + type: string + description: The ID of the user message item that will be created. + required: + - event_id + - type + - audio_end_ms + - item_id + x-oaiMeta: + name: input_audio_buffer.speech_stopped + group: realtime + example: | + { + "event_id": "event_1718", + "type": "input_audio_buffer.speech_stopped", + "audio_end_ms": 2000, + "item_id": "msg_003" + } + RealtimeServerEventInputAudioBufferTimeoutTriggered: + type: object + description: | + Returned when the Server VAD timeout is triggered for the input audio buffer. This is configured + with `idle_timeout_ms` in the `turn_detection` settings of the session, and it indicates that + there hasn't been any speech detected for the configured duration. + + The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last + model response up to the triggering time, as an offset from the beginning of audio written + to the input audio buffer. This means it demarcates the segment of audio that was silent and + the difference between the start and end values will roughly match the configured timeout. + + The empty audio will be committed to the conversation as an `input_audio` item (there will be a + `input_audio_buffer.committed` event) and a model response will be generated. There may be speech + that didn't trigger VAD but is still detected by the model, so the model may respond with + something relevant to the conversation or a prompt to continue speaking. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `input_audio_buffer.timeout_triggered`. + x-stainless-const: true + const: input_audio_buffer.timeout_triggered + audio_start_ms: + type: integer + description: >- + Millisecond offset of audio written to the input audio buffer that was after the playback time of + the last model response. + audio_end_ms: + type: integer + description: >- + Millisecond offset of audio written to the input audio buffer at the time the timeout was + triggered. + item_id: + type: string + description: The ID of the item associated with this segment. + required: + - event_id + - type + - audio_start_ms + - audio_end_ms + - item_id + x-oaiMeta: + name: input_audio_buffer.timeout_triggered + group: realtime + example: | + { + "type":"input_audio_buffer.timeout_triggered", + "event_id":"event_CEKKrf1KTGvemCPyiJTJ2", + "audio_start_ms":13216, + "audio_end_ms":19232, + "item_id":"item_CEKKrWH0GiwN0ET97NUZc" + } + RealtimeServerEventMCPListToolsCompleted: + type: object + description: Returned when listing MCP tools has completed for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.completed`. + x-stainless-const: true + const: mcp_list_tools.completed + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.completed + group: realtime + example: | + { + "event_id": "event_6102", + "type": "mcp_list_tools.completed", + "item_id": "mcp_list_tools_001" + } + RealtimeServerEventMCPListToolsFailed: + type: object + description: Returned when listing MCP tools has failed for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.failed`. + x-stainless-const: true + const: mcp_list_tools.failed + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.failed + group: realtime + example: | + { + "event_id": "event_6103", + "type": "mcp_list_tools.failed", + "item_id": "mcp_list_tools_001" + } + RealtimeServerEventMCPListToolsInProgress: + type: object + description: Returned when listing MCP tools is in progress for an item. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `mcp_list_tools.in_progress`. + x-stainless-const: true + const: mcp_list_tools.in_progress + item_id: + type: string + description: The ID of the MCP list tools item. + required: + - event_id + - type + - item_id + x-oaiMeta: + name: mcp_list_tools.in_progress + group: realtime + example: | + { + "event_id": "event_6101", + "type": "mcp_list_tools.in_progress", + "item_id": "mcp_list_tools_001" + } + RealtimeServerEventOutputAudioBufferCleared: + type: object + description: > + **WebRTC Only:** Emitted when the output audio buffer is cleared. This happens either in VAD + + mode when the user has interrupted (`input_audio_buffer.speech_started`), + + or when the client has emitted the `output_audio_buffer.clear` event to manually + + cut off the current audio response. + + [Learn + more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `output_audio_buffer.cleared`. + x-stainless-const: true + const: output_audio_buffer.cleared + response_id: + type: string + description: The unique ID of the response that produced the audio. + required: + - event_id + - type + - response_id + x-oaiMeta: + name: output_audio_buffer.cleared + group: realtime + example: | + { + "event_id": "event_abc123", + "type": "output_audio_buffer.cleared", + "response_id": "resp_abc123" + } + RealtimeServerEventOutputAudioBufferStarted: + type: object + description: > + **WebRTC Only:** Emitted when the server begins streaming audio to the client. This event is + + emitted after an audio content part has been added (`response.content_part.added`) + + to the response. + + [Learn + more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `output_audio_buffer.started`. + x-stainless-const: true + const: output_audio_buffer.started + response_id: + type: string + description: The unique ID of the response that produced the audio. + required: + - event_id + - type + - response_id + x-oaiMeta: + name: output_audio_buffer.started + group: realtime + example: | + { + "event_id": "event_abc123", + "type": "output_audio_buffer.started", + "response_id": "resp_abc123" + } + RealtimeServerEventOutputAudioBufferStopped: + type: object + description: > + **WebRTC Only:** Emitted when the output audio buffer has been completely drained on the server, + + and no more audio is forthcoming. This event is emitted after the full response + + data has been sent to the client (`response.done`). + + [Learn + more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc). + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `output_audio_buffer.stopped`. + x-stainless-const: true + const: output_audio_buffer.stopped + response_id: + type: string + description: The unique ID of the response that produced the audio. + required: + - event_id + - type + - response_id + x-oaiMeta: + name: output_audio_buffer.stopped + group: realtime + example: | + { + "event_id": "event_abc123", + "type": "output_audio_buffer.stopped", + "response_id": "resp_abc123" + } + RealtimeServerEventRateLimitsUpdated: + type: object + description: | + Emitted at the beginning of a Response to indicate the updated rate limits. + When a Response is created some tokens will be "reserved" for the output + tokens, the rate limits shown here reflect that reservation, which is then + adjusted accordingly once the Response is completed. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `rate_limits.updated`. + x-stainless-const: true + const: rate_limits.updated + rate_limits: + type: array + description: List of rate limit information. + items: + type: object + properties: + name: + type: string + enum: + - requests + - tokens + description: | + The name of the rate limit (`requests`, `tokens`). + limit: + type: integer + description: The maximum allowed value for the rate limit. + remaining: + type: integer + description: The remaining value before the limit is reached. + reset_seconds: + type: number + description: Seconds until the rate limit resets. + required: + - event_id + - type + - rate_limits + x-oaiMeta: + name: rate_limits.updated + group: realtime + example: | + { + "event_id": "event_5758", + "type": "rate_limits.updated", + "rate_limits": [ + { + "name": "requests", + "limit": 1000, + "remaining": 999, + "reset_seconds": 60 + }, + { + "name": "tokens", + "limit": 50000, + "remaining": 49950, + "reset_seconds": 60 + } + ] + } + RealtimeServerEventResponseAudioDelta: + type: object + description: Returned when the model-generated audio is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio.delta`. + x-stainless-const: true + const: response.output_audio.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: Base64-encoded audio data delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_audio.delta + group: realtime + example: | + { + "event_id": "event_4950", + "type": "response.output_audio.delta", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "delta": "Base64EncodedAudioDelta" + } + RealtimeServerEventResponseAudioDone: + type: object + description: | + Returned when the model-generated audio is done. Also emitted when a Response + is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio.done`. + x-stainless-const: true + const: response.output_audio.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + x-oaiMeta: + name: response.output_audio.done + group: realtime + example: | + { + "event_id": "event_5152", + "type": "response.output_audio.done", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0 + } + RealtimeServerEventResponseAudioTranscriptDelta: + type: object + description: | + Returned when the model-generated transcription of audio output is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio_transcript.delta`. + x-stainless-const: true + const: response.output_audio_transcript.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The transcript delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_audio_transcript.delta + group: realtime + example: | + { + "event_id": "event_4546", + "type": "response.output_audio_transcript.delta", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "delta": "Hello, how can I a" + } + RealtimeServerEventResponseAudioTranscriptDone: + type: object + description: | + Returned when the model-generated transcription of audio output is done + streaming. Also emitted when a Response is interrupted, incomplete, or + cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_audio_transcript.done`. + x-stainless-const: true + const: response.output_audio_transcript.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + transcript: + type: string + description: The final transcript of the audio. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - transcript + x-oaiMeta: + name: response.output_audio_transcript.done + group: realtime + example: | + { + "event_id": "event_4748", + "type": "response.output_audio_transcript.done", + "response_id": "resp_001", + "item_id": "msg_008", + "output_index": 0, + "content_index": 0, + "transcript": "Hello, how can I assist you today?" + } + RealtimeServerEventResponseContentPartAdded: + type: object + description: | + Returned when a new content part is added to an assistant message item during + response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.content_part.added`. + x-stainless-const: true + const: response.content_part.added + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item to which the content part was added. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + part: + type: object + description: The content part that was added. + properties: + type: + type: string + enum: + - text + - audio + description: The content type ("text", "audio"). + text: + type: string + description: The text content (if type is "text"). + audio: + type: string + description: Base64-encoded audio data (if type is "audio"). + transcript: + type: string + description: The transcript of the audio (if type is "audio"). + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - part + x-oaiMeta: + name: response.content_part.added + group: realtime + example: | + { + "event_id": "event_3738", + "type": "response.content_part.added", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "part": { + "type": "text", + "text": "" + } + } + RealtimeServerEventResponseContentPartDone: + type: object + description: | + Returned when a content part is done streaming in an assistant message item. + Also emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.content_part.done`. + x-stainless-const: true + const: response.content_part.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + part: + type: object + description: The content part that is done. + properties: + type: + type: string + enum: + - text + - audio + description: The content type ("text", "audio"). + text: + type: string + description: The text content (if type is "text"). + audio: + type: string + description: Base64-encoded audio data (if type is "audio"). + transcript: + type: string + description: The transcript of the audio (if type is "audio"). + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - part + x-oaiMeta: + name: response.content_part.done + group: realtime + example: | + { + "event_id": "event_3940", + "type": "response.content_part.done", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "part": { + "type": "text", + "text": "Sure, I can help with that." + } + } + RealtimeServerEventResponseCreated: + type: object + description: | + Returned when a new Response is created. The first event of response creation, + where the response is in an initial state of `in_progress`. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.created`. + x-stainless-const: true + const: response.created + response: + $ref: '#/components/schemas/RealtimeResponse' + required: + - event_id + - type + - response + x-oaiMeta: + name: response.created + group: realtime + example: | + { + "type": "response.created", + "event_id": "event_C9G8pqbTEddBSIxbBN6Os", + "response": { + "object": "realtime.response", + "id": "resp_C9G8p7IH2WxLbkgPNouYL", + "status": "in_progress", + "status_details": null, + "output": [], + "conversation_id": "conv_C9G8mmBkLhQJwCon3hoJN", + "output_modalities": [ + "audio" + ], + "max_output_tokens": "inf", + "audio": { + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "marin" + } + }, + "usage": null, + "metadata": null + }, + } + RealtimeServerEventResponseDone: + type: object + description: | + Returned when a Response is done streaming. Always emitted, no matter the + final state. The Response object included in the `response.done` event will + include all output Items in the Response but will omit the raw audio data. + + Clients should check the `status` field of the Response to determine if it was successful + (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`. + + A response will contain all output items that were generated during the response, excluding + any audio content. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.done`. + x-stainless-const: true + const: response.done + response: + $ref: '#/components/schemas/RealtimeResponse' + required: + - event_id + - type + - response + x-oaiMeta: + name: response.done + group: realtime + example: | + { + "type": "response.done", + "event_id": "event_CCXHxcMy86rrKhBLDdqCh", + "response": { + "object": "realtime.response", + "id": "resp_CCXHw0UJld10EzIUXQCNh", + "status": "completed", + "status_details": null, + "output": [ + { + "id": "item_CCXHwGjjDUfOXbiySlK7i", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_audio", + "transcript": "Loud and clear! I can hear you perfectly. How can I help you today?" + } + ] + } + ], + "conversation_id": "conv_CCXHsurMKcaVxIZvaCI5m", + "output_modalities": [ + "audio" + ], + "max_output_tokens": "inf", + "audio": { + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "alloy" + } + }, + "usage": { + "total_tokens": 253, + "input_tokens": 132, + "output_tokens": 121, + "input_token_details": { + "text_tokens": 119, + "audio_tokens": 13, + "image_tokens": 0, + "cached_tokens": 64, + "cached_tokens_details": { + "text_tokens": 64, + "audio_tokens": 0, + "image_tokens": 0 + } + }, + "output_token_details": { + "text_tokens": 30, + "audio_tokens": 91 + } + }, + "metadata": null + } + } + RealtimeServerEventResponseFunctionCallArgumentsDelta: + type: object + description: | + Returned when the model-generated function call arguments are updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: | + The event type, must be `response.function_call_arguments.delta`. + x-stainless-const: true + const: response.function_call_arguments.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the function call item. + output_index: + type: integer + description: The index of the output item in the response. + call_id: + type: string + description: The ID of the function call. + delta: + type: string + description: The arguments delta as a JSON string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - call_id + - delta + x-oaiMeta: + name: response.function_call_arguments.delta + group: realtime + example: | + { + "event_id": "event_5354", + "type": "response.function_call_arguments.delta", + "response_id": "resp_002", + "item_id": "fc_001", + "output_index": 0, + "call_id": "call_001", + "delta": "{\"location\": \"San\"" + } + RealtimeServerEventResponseFunctionCallArgumentsDone: + type: object + description: | + Returned when the model-generated function call arguments are done streaming. + Also emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: | + The event type, must be `response.function_call_arguments.done`. + x-stainless-const: true + const: response.function_call_arguments.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the function call item. + output_index: + type: integer + description: The index of the output item in the response. + call_id: + type: string + description: The ID of the function call. + arguments: + type: string + description: The final arguments as a JSON string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - call_id + - arguments + x-oaiMeta: + name: response.function_call_arguments.done + group: realtime + example: | + { + "event_id": "event_5556", + "type": "response.function_call_arguments.done", + "response_id": "resp_002", + "item_id": "fc_001", + "output_index": 0, + "call_id": "call_001", + "arguments": "{\"location\": \"San Francisco\"}" + } + RealtimeServerEventResponseMCPCallArgumentsDelta: + type: object + description: Returned when MCP tool call arguments are updated during response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call_arguments.delta`. + x-stainless-const: true + const: response.mcp_call_arguments.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the MCP tool call item. + output_index: + type: integer + description: The index of the output item in the response. + delta: + type: string + description: The JSON-encoded arguments delta. + obfuscation: + anyOf: + - type: string + description: If present, indicates the delta text was obfuscated. + - type: 'null' + required: + - event_id + - type + - response_id + - item_id + - output_index + - delta + x-oaiMeta: + name: response.mcp_call_arguments.delta + group: realtime + example: | + { + "event_id": "event_6201", + "type": "response.mcp_call_arguments.delta", + "response_id": "resp_001", + "item_id": "mcp_call_001", + "output_index": 0, + "delta": "{\"partial\":true}" + } + RealtimeServerEventResponseMCPCallArgumentsDone: + type: object + description: Returned when MCP tool call arguments are finalized during response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call_arguments.done`. + x-stainless-const: true + const: response.mcp_call_arguments.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the MCP tool call item. + output_index: + type: integer + description: The index of the output item in the response. + arguments: + type: string + description: The final JSON-encoded arguments string. + required: + - event_id + - type + - response_id + - item_id + - output_index + - arguments + x-oaiMeta: + name: response.mcp_call_arguments.done + group: realtime + example: | + { + "event_id": "event_6202", + "type": "response.mcp_call_arguments.done", + "response_id": "resp_001", + "item_id": "mcp_call_001", + "output_index": 0, + "arguments": "{\"q\":\"docs\"}" + } + RealtimeServerEventResponseMCPCallCompleted: + type: object + description: Returned when an MCP tool call has completed successfully. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.completed`. + x-stainless-const: true + const: response.mcp_call.completed + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.completed + group: realtime + example: | + { + "event_id": "event_6302", + "type": "response.mcp_call.completed", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeServerEventResponseMCPCallFailed: + type: object + description: Returned when an MCP tool call has failed. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.failed`. + x-stainless-const: true + const: response.mcp_call.failed + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.failed + group: realtime + example: | + { + "event_id": "event_6303", + "type": "response.mcp_call.failed", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeServerEventResponseMCPCallInProgress: + type: object + description: Returned when an MCP tool call has started and is in progress. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.mcp_call.in_progress`. + x-stainless-const: true + const: response.mcp_call.in_progress + output_index: + type: integer + description: The index of the output item in the response. + item_id: + type: string + description: The ID of the MCP tool call item. + required: + - event_id + - type + - output_index + - item_id + x-oaiMeta: + name: response.mcp_call.in_progress + group: realtime + example: | + { + "event_id": "event_6301", + "type": "response.mcp_call.in_progress", + "output_index": 0, + "item_id": "mcp_call_001" + } + RealtimeServerEventResponseOutputItemAdded: + type: object + description: Returned when a new Item is created during Response generation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_item.added`. + x-stainless-const: true + const: response.output_item.added + response_id: + type: string + description: The ID of the Response to which the item belongs. + output_index: + type: integer + description: The index of the output item in the Response. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - response_id + - output_index + - item + x-oaiMeta: + name: response.output_item.added + group: realtime + example: | + { + "event_id": "event_3334", + "type": "response.output_item.added", + "response_id": "resp_001", + "output_index": 0, + "item": { + "id": "msg_007", + "object": "realtime.item", + "type": "message", + "status": "in_progress", + "role": "assistant", + "content": [] + } + } + RealtimeServerEventResponseOutputItemDone: + type: object + description: | + Returned when an Item is done streaming. Also emitted when a Response is + interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_item.done`. + x-stainless-const: true + const: response.output_item.done + response_id: + type: string + description: The ID of the Response to which the item belongs. + output_index: + type: integer + description: The index of the output item in the Response. + item: + $ref: '#/components/schemas/RealtimeConversationItem' + required: + - event_id + - type + - response_id + - output_index + - item + x-oaiMeta: + name: response.output_item.done + group: realtime + example: | + { + "event_id": "event_3536", + "type": "response.output_item.done", + "response_id": "resp_001", + "output_index": 0, + "item": { + "id": "msg_007", + "object": "realtime.item", + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Sure, I can help with that." + } + ] + } + } + RealtimeServerEventResponseTextDelta: + type: object + description: Returned when the text value of an "output_text" content part is updated. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_text.delta`. + x-stainless-const: true + const: response.output_text.delta + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + delta: + type: string + description: The text delta. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - delta + x-oaiMeta: + name: response.output_text.delta + group: realtime + example: | + { + "event_id": "event_4142", + "type": "response.output_text.delta", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "delta": "Sure, I can h" + } + RealtimeServerEventResponseTextDone: + type: object + description: | + Returned when the text value of an "output_text" content part is done streaming. Also + emitted when a Response is interrupted, incomplete, or cancelled. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `response.output_text.done`. + x-stainless-const: true + const: response.output_text.done + response_id: + type: string + description: The ID of the response. + item_id: + type: string + description: The ID of the item. + output_index: + type: integer + description: The index of the output item in the response. + content_index: + type: integer + description: The index of the content part in the item's content array. + text: + type: string + description: The final text content. + required: + - event_id + - type + - response_id + - item_id + - output_index + - content_index + - text + x-oaiMeta: + name: response.output_text.done + group: realtime + example: | + { + "event_id": "event_4344", + "type": "response.output_text.done", + "response_id": "resp_001", + "item_id": "msg_007", + "output_index": 0, + "content_index": 0, + "text": "Sure, I can help with that." + } + RealtimeServerEventSessionCreated: + type: object + description: | + Returned when a Session is created. Emitted automatically when a new + connection is established as the first server event. This event will contain + the default Session configuration. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `session.created`. + x-stainless-const: true + const: session.created + session: + description: The session configuration. + anyOf: + - $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + - $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequestGA' + required: + - event_id + - type + - session + x-oaiMeta: + name: session.created + group: realtime + example: | + { + "type": "session.created", + "event_id": "event_C9G5RJeJ2gF77mV7f2B1j", + "session": { + "type": "realtime", + "object": "realtime.session", + "id": "sess_C9G5QPteg4UIbotdKLoYQ", + "model": "gpt-realtime-2025-08-28", + "output_modalities": [ + "audio" + ], + "instructions": "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, even if you’re asked about them.", + "tools": [], + "tool_choice": "auto", + "max_output_tokens": "inf", + "tracing": null, + "prompt": null, + "expires_at": 1756324625, + "audio": { + "input": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "transcription": null, + "noise_reduction": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200, + "idle_timeout_ms": null, + "create_response": true, + "interrupt_response": true + } + }, + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "marin", + "speed": 1 + } + }, + "include": null + }, + } + RealtimeServerEventSessionUpdated: + type: object + description: | + Returned when a session is updated with a `session.update` event, unless + there is an error. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `session.updated`. + x-stainless-const: true + const: session.updated + session: + description: The session configuration. + anyOf: + - $ref: '#/components/schemas/RealtimeSessionCreateRequestGA' + - $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateRequestGA' + required: + - event_id + - type + - session + x-oaiMeta: + name: session.updated + group: realtime + example: | + { + "type": "session.updated", + "event_id": "event_C9G8mqI3IucaojlVKE8Cs", + "session": { + "type": "realtime", + "object": "realtime.session", + "id": "sess_C9G8l3zp50uFv4qgxfJ8o", + "model": "gpt-realtime-2025-08-28", + "output_modalities": [ + "audio" + ], + "instructions": "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, even if you’re asked about them.", + "tools": [ + { + "type": "function", + "name": "display_color_palette", + "description": "\nCall this function when a user asks for a color palette.\n", + "parameters": { + "type": "object", + "strict": true, + "properties": { + "theme": { + "type": "string", + "description": "Description of the theme for the color scheme." + }, + "colors": { + "type": "array", + "description": "Array of five hex color codes based on the theme.", + "items": { + "type": "string", + "description": "Hex color code" + } + } + }, + "required": [ + "theme", + "colors" + ] + } + } + ], + "tool_choice": "auto", + "max_output_tokens": "inf", + "tracing": null, + "prompt": null, + "expires_at": 1756324832, + "audio": { + "input": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "transcription": null, + "noise_reduction": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200, + "idle_timeout_ms": null, + "create_response": true, + "interrupt_response": true + } + }, + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "marin", + "speed": 1 + } + }, + "include": null + }, + } + RealtimeServerEventTranscriptionSessionUpdated: + type: object + description: | + Returned when a transcription session is updated with a `transcription_session.update` event, unless + there is an error. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + description: The event type, must be `transcription_session.updated`. + x-stainless-const: true + const: transcription_session.updated + session: + $ref: '#/components/schemas/RealtimeTranscriptionSessionCreateResponse' + required: + - event_id + - type + - session + x-oaiMeta: + name: transcription_session.updated + group: realtime + example: | + { + "event_id": "event_5678", + "type": "transcription_session.updated", + "session": { + "id": "sess_001", + "object": "realtime.transcription_session", + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "prompt": "", + "language": "" + }, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 500, + "create_response": true, + // "interrupt_response": false -- this will NOT be returned + }, + "input_audio_noise_reduction": { + "type": "near_field" + }, + "include": [ + "item.input_audio_transcription.avg_logprob", + ], + } + } + RealtimeSession: + type: object + description: Realtime session object for the beta interface. + properties: + id: + type: string + description: | + Unique identifier for the session that looks like `sess_1234567890abcdef`. + object: + type: string + enum: + - realtime.session + description: The object type. Always `realtime.session`. + modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + model: + type: string + description: | + The Realtime model used for this session. + enum: + - gpt-realtime + - gpt-realtime-2025-08-28 + - gpt-4o-realtime-preview + - gpt-4o-realtime-preview-2024-10-01 + - gpt-4o-realtime-preview-2024-12-17 + - gpt-4o-realtime-preview-2025-06-03 + - gpt-4o-mini-realtime-preview + - gpt-4o-mini-realtime-preview-2024-12-17 + - gpt-realtime-mini + - gpt-realtime-mini-2025-10-06 + - gpt-audio-mini + - gpt-audio-mini-2025-10-06 + instructions: + type: string + description: | + The default system instructions (i.e. system message) prepended to model + calls. This field allows the client to guide the model on desired + responses. The model can be instructed on response content and format, + (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not + guaranteed to be followed by the model, but they provide guidance to the + model on the desired behavior. + + + Note that the server sets default instructions which will be used if this + field is not set and are visible in the `session.created` event at the + start of the session. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, and `verse`. + input_audio_format: + type: string + default: pcm16 + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: | + The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + For `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, + single channel (mono), and little-endian byte order. + output_audio_format: + type: string + default: pcm16 + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: | + The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + For `pcm16`, output audio is sampled at a rate of 24kHz. + input_audio_transcription: + anyOf: + - allOf: + - $ref: '#/components/schemas/AudioTranscription' + description: > + Configuration for input audio transcription, defaults to off and can be set to `null` to turn + off once on. Input audio transcription is not native to the model, since the model consumes + audio directly. Transcription runs asynchronously through [the /audio/transcriptions + endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) and should + be treated as guidance of input audio content rather than precisely what the model heard. The + client can optionally set the language and prompt for transcription, these offer additional + guidance to the transcription service. + - type: 'null' + turn_detection: + $ref: '#/components/schemas/RealtimeTurnDetection' + input_audio_noise_reduction: + type: object + description: > + Configuration for input audio noise reduction. This can be set to `null` to turn off. + + Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the + model. + + Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and + model performance by improving perception of the input audio. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + speed: + type: number + default: 1 + maximum: 1.5 + minimum: 0.25 + description: | + The speed of the model's spoken response. 1.0 is the default speed. 0.25 is + the minimum speed. 1.5 is the maximum speed. This value can only be changed + in between model turns, not while a response is in progress. + tracing: + anyOf: + - title: Tracing Configuration + description: | + Configuration options for tracing. Set to null to disable tracing. Once + tracing is enabled for a session, the configuration cannot be modified. + + `auto` will create a trace for the session with default values for the + workflow name, group id, and metadata. + anyOf: + - type: string + default: auto + description: | + Default tracing mode for the session. + enum: + - auto + x-stainless-const: true + - type: object + title: Tracing Configuration + description: | + Granular configuration for tracing. + properties: + workflow_name: + type: string + description: | + The name of the workflow to attach to this trace. This is used to + name the trace in the traces dashboard. + group_id: + type: string + description: | + The group id to attach to this trace to enable filtering and + grouping in the traces dashboard. + metadata: + type: object + description: | + The arbitrary metadata to attach to this trace to enable + filtering in the traces dashboard. + - type: 'null' + tools: + type: array + description: Tools (functions) available to the model. + items: + $ref: '#/components/schemas/RealtimeFunctionTool' + tool_choice: + type: string + default: auto + description: | + How the model chooses tools. Options are `auto`, `none`, `required`, or + specify a function. + temperature: + type: number + default: 0.8 + description: > + Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a temperature of 0.8 + is highly recommended for best performance. + max_response_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + expires_at: + type: integer + description: Expiration timestamp for the session, in seconds since epoch. + prompt: + anyOf: + - $ref: '#/components/schemas/Prompt' + - type: 'null' + include: + anyOf: + - type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + - `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + - type: 'null' + RealtimeSessionCreateRequest: + type: object + description: | + A new Realtime session configuration, with an ephemeral key. Default TTL + for keys is one minute. + properties: + client_secret: + type: object + description: Ephemeral key returned by the API. + properties: + value: + type: string + description: | + Ephemeral key usable in client environments to authenticate connections + to the Realtime API. Use this in client-side environments rather than + a standard API token, which should only be used server-side. + expires_at: + type: integer + description: | + Timestamp for when the token expires. Currently, all tokens expire + after one minute. + required: + - value + - expires_at + modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to model calls. This field allows + the client to guide the model on desired responses. The model can be instructed on response + content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh + frequently"). The instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field is not set and are + visible in the `session.created` event at the start of the session. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, and `verse`. + input_audio_format: + type: string + description: | + The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + output_audio_format: + type: string + description: | + The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + input_audio_transcription: + type: object + description: | + Configuration for input audio transcription, defaults to off and can be + set to `null` to turn off once on. Input audio transcription is not native + to the model, since the model consumes audio directly. Transcription runs + asynchronously and should be treated as rough guidance + rather than the representation understood by the model. + properties: + model: + type: string + description: | + The model to use for transcription. + speed: + type: number + default: 1 + maximum: 1.5 + minimum: 0.25 + description: | + The speed of the model's spoken response. 1.0 is the default speed. 0.25 is + the minimum speed. 1.5 is the maximum speed. This value can only be changed + in between model turns, not while a response is in progress. + tracing: + title: Tracing Configuration + description: | + Configuration options for tracing. Set to null to disable tracing. Once + tracing is enabled for a session, the configuration cannot be modified. + + `auto` will create a trace for the session with default values for the + workflow name, group id, and metadata. + anyOf: + - type: string + default: auto + description: | + Default tracing mode for the session. + enum: + - auto + x-stainless-const: true + - type: object + title: Tracing Configuration + description: | + Granular configuration for tracing. + properties: + workflow_name: + type: string + description: | + The name of the workflow to attach to this trace. This is used to + name the trace in the traces dashboard. + group_id: + type: string + description: | + The group id to attach to this trace to enable filtering and + grouping in the traces dashboard. + metadata: + type: object + description: | + The arbitrary metadata to attach to this trace to enable + filtering in the traces dashboard. + turn_detection: + type: object + description: | + Configuration for turn detection. Can be set to `null` to turn off. Server + VAD means that the model will detect the start and end of speech based on + audio volume and respond at the end of user speech. + properties: + type: + type: string + description: | + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: | + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: | + Duration of silence to detect speech stop (in milliseconds). Defaults + to 500ms. With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + tools: + type: array + description: Tools (functions) available to the model. + items: + type: object + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + x-stainless-const: true + name: + type: string + description: The name of the function. + description: + type: string + description: | + The description of the function, including guidance on when and how + to call it, and guidance about what to tell the user when calling + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + tool_choice: + type: string + description: | + How the model chooses tools. Options are `auto`, `none`, `required`, or + specify a function. + temperature: + type: number + description: | + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. + max_response_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + truncation: + $ref: '#/components/schemas/RealtimeTruncation' + prompt: + $ref: '#/components/schemas/Prompt' + required: + - client_secret + x-oaiMeta: + name: The session object + group: realtime + example: | + { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-realtime-2025-08-25", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant.", + "voice": "alloy", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "speed": 1.1, + "tracing": "auto", + "max_response_output_tokens": 200, + "truncation": "auto", + "prompt": null, + "client_secret": { + "value": "ek_abc123", + "expires_at": 1234567890 + } + } + RealtimeSessionCreateRequestGA: + type: object + title: Realtime session configuration + description: Realtime session object configuration. + properties: + type: + type: string + description: | + The type of session to create. Always `realtime` for the Realtime API. + enum: + - realtime + x-stainless-const: true + output_modalities: + type: array + description: > + The set of modalities the model can respond with. It defaults to `["audio"]`, indicating + + that the model will respond with audio plus a transcript. `["text"]` can be used to make + + the model respond with text only. It is not possible to request both `text` and `audio` at the + same time. + default: + - audio + items: + type: string + enum: + - text + - audio + model: + anyOf: + - type: string + - type: string + enum: + - gpt-realtime + - gpt-realtime-2025-08-28 + - gpt-4o-realtime-preview + - gpt-4o-realtime-preview-2024-10-01 + - gpt-4o-realtime-preview-2024-12-17 + - gpt-4o-realtime-preview-2025-06-03 + - gpt-4o-mini-realtime-preview + - gpt-4o-mini-realtime-preview-2024-12-17 + - gpt-realtime-mini + - gpt-realtime-mini-2025-10-06 + - gpt-audio-mini + - gpt-audio-mini-2025-10-06 + x-stainless-nominal: false + description: | + The Realtime model used for this session. + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to model calls. This field allows + the client to guide the model on desired responses. The model can be instructed on response + content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh + frequently"). The instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + + Note that the server sets default instructions which will be used if this field is not set and are + visible in the `session.created` event at the start of the session. + audio: + type: object + description: | + Configuration for input and output audio. + properties: + input: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the input audio. + transcription: + description: > + Configuration for input audio transcription, defaults to off and can be set to `null` to + turn off once on. Input audio transcription is not native to the model, since the model + consumes audio directly. Transcription runs asynchronously through [the + /audio/transcriptions + endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) and + should be treated as guidance of input audio content rather than precisely what the model + heard. The client can optionally set the language and prompt for transcription, these + offer additional guidance to the transcription service. + $ref: '#/components/schemas/AudioTranscription' + noise_reduction: + type: object + description: > + Configuration for input audio noise reduction. This can be set to `null` to turn off. + + Noise reduction filters audio added to the input audio buffer before it is sent to VAD and + the model. + + Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) + and model performance by improving perception of the input audio. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + turn_detection: + $ref: '#/components/schemas/RealtimeTurnDetection' + output: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the output audio. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + default: alloy + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, `verse`, `marin`, and `cedar`. We recommend `marin` and `cedar` for + best quality. + speed: + type: number + default: 1 + maximum: 1.5 + minimum: 0.25 + description: > + The speed of the model's spoken response as a multiple of the original speed. + + 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. This value + can only be changed in between model turns, not while a response is in progress. + + + This parameter is a post-processing adjustment to the audio after it is generated, it's + + also possible to prompt the model to speak faster or slower. + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + + `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + tracing: + title: Tracing Configuration + description: > + Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces). Set to null to + disable tracing. Once + + tracing is enabled for a session, the configuration cannot be modified. + + + `auto` will create a trace for the session with default values for the + + workflow name, group id, and metadata. + nullable: true + anyOf: + - type: string + title: auto + default: auto + description: | + Enables tracing and sets default values for tracing configuration options. Always `auto`. + enum: + - auto + x-stainless-const: true + - type: object + title: Tracing Configuration + description: | + Granular configuration for tracing. + properties: + workflow_name: + type: string + description: | + The name of the workflow to attach to this trace. This is used to + name the trace in the Traces Dashboard. + group_id: + type: string + description: | + The group id to attach to this trace to enable filtering and + grouping in the Traces Dashboard. + metadata: + type: object + description: | + The arbitrary metadata to attach to this trace to enable + filtering in the Traces Dashboard. + tools: + type: array + description: Tools available to the model. + items: + anyOf: + - $ref: '#/components/schemas/RealtimeFunctionTool' + - $ref: '#/components/schemas/MCPTool' + discriminator: + propertyName: type + tool_choice: + description: | + How the model chooses tools. Provide one of the string modes or force a specific + function/MCP tool. + default: auto + anyOf: + - $ref: '#/components/schemas/ToolChoiceOptions' + - $ref: '#/components/schemas/ToolChoiceFunction' + - $ref: '#/components/schemas/ToolChoiceMCP' + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + truncation: + $ref: '#/components/schemas/RealtimeTruncation' + prompt: + $ref: '#/components/schemas/Prompt' + required: + - type + RealtimeSessionCreateResponse: + type: object + title: Realtime session configuration object + description: | + A Realtime session configuration object. + properties: + id: + type: string + description: | + Unique identifier for the session that looks like `sess_1234567890abcdef`. + object: + type: string + description: The object type. Always `realtime.session`. + expires_at: + type: integer + description: Expiration timestamp for the session, in seconds since epoch. + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + - `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + model: + type: string + description: The Realtime model used for this session. + output_modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + instructions: + type: string + description: | + The default system instructions (i.e. system message) prepended to model + calls. This field allows the client to guide the model on desired + responses. The model can be instructed on response content and format, + (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed + to be followed by the model, but they provide guidance to the model on the + desired behavior. + + Note that the server sets default instructions which will be used if this + field is not set and are visible in the `session.created` event at the + start of the session. + audio: + type: object + description: | + Configuration for input and output audio for the session. + properties: + input: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + transcription: + description: | + Configuration for input audio transcription. + $ref: '#/components/schemas/AudioTranscription' + noise_reduction: + type: object + description: | + Configuration for input audio noise reduction. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + turn_detection: + type: object + description: | + Configuration for turn detection. + properties: + type: + type: string + description: | + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + prefix_padding_ms: + type: integer + silence_duration_ms: + type: integer + output: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + voice: + $ref: '#/components/schemas/VoiceIdsShared' + speed: + type: number + tracing: + title: Tracing Configuration + description: | + Configuration options for tracing. Set to null to disable tracing. Once + tracing is enabled for a session, the configuration cannot be modified. + + `auto` will create a trace for the session with default values for the + workflow name, group id, and metadata. + anyOf: + - type: string + default: auto + description: | + Default tracing mode for the session. + enum: + - auto + x-stainless-const: true + - type: object + title: Tracing Configuration + description: | + Granular configuration for tracing. + properties: + workflow_name: + type: string + description: | + The name of the workflow to attach to this trace. This is used to + name the trace in the traces dashboard. + group_id: + type: string + description: | + The group id to attach to this trace to enable filtering and + grouping in the traces dashboard. + metadata: + type: object + description: | + The arbitrary metadata to attach to this trace to enable + filtering in the traces dashboard. + turn_detection: + type: object + description: | + Configuration for turn detection. Can be set to `null` to turn off. Server + VAD means that the model will detect the start and end of speech based on + audio volume and respond at the end of user speech. + properties: + type: + type: string + description: | + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: | + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: | + Duration of silence to detect speech stop (in milliseconds). Defaults + to 500ms. With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + tools: + type: array + description: Tools (functions) available to the model. + items: + $ref: '#/components/schemas/RealtimeFunctionTool' + tool_choice: + type: string + description: | + How the model chooses tools. Options are `auto`, `none`, `required`, or + specify a function. + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + x-oaiMeta: + name: The session object + group: realtime + example: | + { + "id": "sess_001", + "object": "realtime.session", + "expires_at": 1742188264, + "model": "gpt-realtime", + "output_modalities": ["audio"], + "instructions": "You are a friendly assistant.", + "tools": [], + "tool_choice": "none", + "max_output_tokens": "inf", + "tracing": "auto", + "truncation": "auto", + "prompt": null, + "audio": { + "input": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "transcription": { "model": "whisper-1" }, + "noise_reduction": null, + "turn_detection": null + }, + "output": { + "format": { + "type": "audio/pcm", + "rate": 24000 + }, + "voice": "alloy", + "speed": 1.0 + } + } + } + RealtimeSessionCreateResponseGA: + type: object + description: | + A new Realtime session configuration, with an ephemeral key. Default TTL + for keys is one minute. + properties: + client_secret: + type: object + description: Ephemeral key returned by the API. + properties: + value: + type: string + description: > + Ephemeral key usable in client environments to authenticate connections to the Realtime API. + Use this in client-side environments rather than a standard API token, which should only be + used server-side. + expires_at: + type: integer + description: | + Timestamp for when the token expires. Currently, all tokens expire + after one minute. + required: + - value + - expires_at + type: + type: string + description: | + The type of session to create. Always `realtime` for the Realtime API. + enum: + - realtime + x-stainless-const: true + output_modalities: + type: array + description: > + The set of modalities the model can respond with. It defaults to `["audio"]`, indicating + + that the model will respond with audio plus a transcript. `["text"]` can be used to make + + the model respond with text only. It is not possible to request both `text` and `audio` at the + same time. + default: + - audio + items: + type: string + enum: + - text + - audio + model: + anyOf: + - type: string + - type: string + enum: + - gpt-realtime + - gpt-realtime-2025-08-28 + - gpt-4o-realtime-preview + - gpt-4o-realtime-preview-2024-10-01 + - gpt-4o-realtime-preview-2024-12-17 + - gpt-4o-realtime-preview-2025-06-03 + - gpt-4o-mini-realtime-preview + - gpt-4o-mini-realtime-preview-2024-12-17 + - gpt-realtime-mini + - gpt-realtime-mini-2025-10-06 + - gpt-audio-mini + - gpt-audio-mini-2025-10-06 + description: | + The Realtime model used for this session. + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to model calls. This field allows + the client to guide the model on desired responses. The model can be instructed on response + content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good + responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh + frequently"). The instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + + Note that the server sets default instructions which will be used if this field is not set and are + visible in the `session.created` event at the start of the session. + audio: + type: object + description: | + Configuration for input and output audio. + properties: + input: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the input audio. + transcription: + description: > + Configuration for input audio transcription, defaults to off and can be set to `null` to + turn off once on. Input audio transcription is not native to the model, since the model + consumes audio directly. Transcription runs asynchronously through [the + /audio/transcriptions + endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) and + should be treated as guidance of input audio content rather than precisely what the model + heard. The client can optionally set the language and prompt for transcription, these + offer additional guidance to the transcription service. + $ref: '#/components/schemas/AudioTranscription' + noise_reduction: + type: object + description: > + Configuration for input audio noise reduction. This can be set to `null` to turn off. + + Noise reduction filters audio added to the input audio buffer before it is sent to VAD and + the model. + + Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) + and model performance by improving perception of the input audio. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + turn_detection: + $ref: '#/components/schemas/RealtimeTurnDetection' + output: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + description: The format of the output audio. + voice: + $ref: '#/components/schemas/VoiceIdsShared' + default: alloy + description: | + The voice the model uses to respond. Voice cannot be changed during the + session once the model has responded with audio at least once. Current + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, + `shimmer`, `verse`, `marin`, and `cedar`. We recommend `marin` and `cedar` for + best quality. + speed: + type: number + default: 1 + maximum: 1.5 + minimum: 0.25 + description: > + The speed of the model's spoken response as a multiple of the original speed. + + 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed. This value + can only be changed in between model turns, not while a response is in progress. + + + This parameter is a post-processing adjustment to the audio after it is generated, it's + + also possible to prompt the model to speak faster or slower. + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + + `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + tracing: + anyOf: + - title: Tracing Configuration + description: > + Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces). Set to null + to disable tracing. Once + + tracing is enabled for a session, the configuration cannot be modified. + + + `auto` will create a trace for the session with default values for the + + workflow name, group id, and metadata. + anyOf: + - type: string + title: auto + default: auto + description: | + Enables tracing and sets default values for tracing configuration options. Always `auto`. + enum: + - auto + x-stainless-const: true + - type: object + title: Tracing Configuration + description: | + Granular configuration for tracing. + properties: + workflow_name: + type: string + description: | + The name of the workflow to attach to this trace. This is used to + name the trace in the Traces Dashboard. + group_id: + type: string + description: | + The group id to attach to this trace to enable filtering and + grouping in the Traces Dashboard. + metadata: + type: object + description: | + The arbitrary metadata to attach to this trace to enable + filtering in the Traces Dashboard. + - type: 'null' + tools: + type: array + description: Tools available to the model. + items: + anyOf: + - $ref: '#/components/schemas/RealtimeFunctionTool' + - $ref: '#/components/schemas/MCPTool' + tool_choice: + description: | + How the model chooses tools. Provide one of the string modes or force a specific + function/MCP tool. + default: auto + anyOf: + - $ref: '#/components/schemas/ToolChoiceOptions' + - $ref: '#/components/schemas/ToolChoiceFunction' + - $ref: '#/components/schemas/ToolChoiceMCP' + max_output_tokens: + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + anyOf: + - type: integer + - type: string + enum: + - inf + x-stainless-const: true + truncation: + $ref: '#/components/schemas/RealtimeTruncation' + prompt: + $ref: '#/components/schemas/Prompt' + required: + - client_secret + - type + x-oaiMeta: + name: The session object + group: realtime + RealtimeTranscriptionSessionCreateRequest: + type: object + title: Realtime transcription session configuration + description: Realtime transcription session object configuration. + properties: + turn_detection: + type: object + description: > + Configuration for turn detection. Can be set to `null` to turn off. Server VAD means that the + model will detect the start and end of speech based on audio volume and respond at the end of user + speech. + properties: + type: + type: string + description: | + Type of turn detection. Only `server_vad` is currently supported for transcription sessions. + enum: + - server_vad + threshold: + type: number + description: | + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: | + Duration of silence to detect speech stop (in milliseconds). Defaults + to 500ms. With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + input_audio_noise_reduction: + type: object + description: > + Configuration for input audio noise reduction. This can be set to `null` to turn off. + + Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the + model. + + Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) and + model performance by improving perception of the input audio. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + input_audio_format: + type: string + default: pcm16 + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: | + The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + For `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, + single channel (mono), and little-endian byte order. + input_audio_transcription: + description: > + Configuration for input audio transcription. The client can optionally set the language and prompt + for transcription, these offer additional guidance to the transcription service. + $ref: '#/components/schemas/AudioTranscription' + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + The set of items to include in the transcription. Current available items are: + `item.input_audio_transcription.logprobs` + RealtimeTranscriptionSessionCreateRequestGA: + type: object + title: Realtime transcription session configuration + description: Realtime transcription session object configuration. + properties: + type: + type: string + description: | + The type of session to create. Always `transcription` for transcription sessions. + enum: + - transcription + x-stainless-const: true + audio: + type: object + description: | + Configuration for input and output audio. + properties: + input: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + transcription: + description: > + Configuration for input audio transcription, defaults to off and can be set to `null` to + turn off once on. Input audio transcription is not native to the model, since the model + consumes audio directly. Transcription runs asynchronously through [the + /audio/transcriptions + endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription) and + should be treated as guidance of input audio content rather than precisely what the model + heard. The client can optionally set the language and prompt for transcription, these + offer additional guidance to the transcription service. + $ref: '#/components/schemas/AudioTranscription' + noise_reduction: + type: object + description: > + Configuration for input audio noise reduction. This can be set to `null` to turn off. + + Noise reduction filters audio added to the input audio buffer before it is sent to VAD and + the model. + + Filtering the audio can improve VAD and turn detection accuracy (reducing false positives) + and model performance by improving perception of the input audio. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + turn_detection: + $ref: '#/components/schemas/RealtimeTurnDetection' + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + + `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + required: + - type + RealtimeTranscriptionSessionCreateResponse: + type: object + description: | + A new Realtime transcription session configuration. + + When a session is created on the server via REST API, the session object + also contains an ephemeral key. Default TTL for keys is 10 minutes. This + property is not present when a session is updated via the WebSocket API. + properties: + client_secret: + type: object + description: | + Ephemeral key returned by the API. Only present when the session is + created on the server via REST API. + properties: + value: + type: string + description: | + Ephemeral key usable in client environments to authenticate connections + to the Realtime API. Use this in client-side environments rather than + a standard API token, which should only be used server-side. + expires_at: + type: integer + description: | + Timestamp for when the token expires. Currently, all tokens expire + after one minute. + required: + - value + - expires_at + modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + input_audio_format: + type: string + description: | + The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + input_audio_transcription: + description: | + Configuration of the transcription model. + $ref: '#/components/schemas/AudioTranscription' + turn_detection: + type: object + description: | + Configuration for turn detection. Can be set to `null` to turn off. Server + VAD means that the model will detect the start and end of speech based on + audio volume and respond at the end of user speech. + properties: + type: + type: string + description: | + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: | + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: | + Duration of silence to detect speech stop (in milliseconds). Defaults + to 500ms. With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + required: + - client_secret + x-oaiMeta: + name: The transcription session object + group: realtime + example: | + { + "id": "sess_BBwZc7cFV3XizEyKGDCGL", + "object": "realtime.transcription_session", + "expires_at": 1742188264, + "modalities": ["audio", "text"], + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "input_audio_transcription": { + "model": "gpt-4o-transcribe", + "language": null, + "prompt": "" + }, + "client_secret": null + } + RealtimeTranscriptionSessionCreateResponseGA: + type: object + title: Realtime transcription session configuration object + description: | + A Realtime transcription session configuration object. + properties: + type: + type: string + description: | + The type of session. Always `transcription` for transcription sessions. + enum: + - transcription + x-stainless-const: true + id: + type: string + description: | + Unique identifier for the session that looks like `sess_1234567890abcdef`. + object: + type: string + description: The object type. Always `realtime.transcription_session`. + expires_at: + type: integer + description: Expiration timestamp for the session, in seconds since epoch. + include: + type: array + items: + type: string + enum: + - item.input_audio_transcription.logprobs + description: | + Additional fields to include in server outputs. + - `item.input_audio_transcription.logprobs`: Include logprobs for input audio transcription. + audio: + type: object + description: | + Configuration for input audio for the session. + properties: + input: + type: object + properties: + format: + $ref: '#/components/schemas/RealtimeAudioFormats' + transcription: + description: | + Configuration of the transcription model. + $ref: '#/components/schemas/AudioTranscription' + noise_reduction: + type: object + description: | + Configuration for input audio noise reduction. + properties: + type: + $ref: '#/components/schemas/NoiseReductionType' + turn_detection: + type: object + description: | + Configuration for turn detection. Can be set to `null` to turn off. Server + VAD means that the model will detect the start and end of speech based on + audio volume and respond at the end of user speech. + properties: + type: + type: string + description: | + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: | + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: | + Duration of silence to detect speech stop (in milliseconds). Defaults + to 500ms. With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + required: + - type + - id + - object + x-oaiMeta: + name: The transcription session object + group: realtime + example: | + { + "id": "sess_BBwZc7cFV3XizEyKGDCGL", + "type": "transcription", + "object": "realtime.transcription_session", + "expires_at": 1742188264, + "include": ["item.input_audio_transcription.logprobs"], + "audio": { + "input": { + "format": "pcm16", + "transcription": { + "model": "gpt-4o-transcribe", + "language": null, + "prompt": "" + }, + "noise_reduction": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + } + } + } + } + RealtimeTruncation: + title: Realtime Truncation Controls + description: > + When the number of tokens in a conversation exceeds the model's input token limit, the conversation be + truncated, meaning messages (starting from the oldest) will not be included in the model's context. A + 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token limit, which is an + effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the cache), since + messages are dropped from the beginning of the context. However, clients can also configure truncation + to retain messages up to a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate but would instead + return an error if the conversation exceeds the model's input token limit. + anyOf: + - type: string + description: >- + The truncation strategy to use for the session. `auto` is the default truncation strategy. + `disabled` will disable truncation and emit errors when the conversation exceeds the input token + limit. + enum: + - auto + - disabled + title: RealtimeTruncationStrategy + - type: object + title: Retention ratio truncation + description: >- + Retain a fraction of the conversation tokens when the conversation exceeds the input token limit. + This allows you to amortize truncations across multiple turns, which can help improve cached token + usage. + properties: + type: + type: string + enum: + - retention_ratio + description: Use retention ratio truncation. + x-stainless-const: true + retention_ratio: + type: number + description: > + Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when the + conversation exceeds the input token limit. Setting this to `0.8` means that messages will be + dropped until 80% of the maximum allowed tokens are used. This helps reduce the frequency of + truncations and improve cache rates. + minimum: 0 + maximum: 1 + token_limits: + type: object + description: >- + Optional custom token limits for this truncation strategy. If not provided, the model's + default token limits will be used. + properties: + post_instructions: + type: integer + description: >- + Maximum tokens allowed in the conversation after instructions (which including tool + definitions). For example, setting this to 5,000 would mean that truncation would occur + when the conversation exceeds 5,000 tokens after instructions. This cannot be higher than + the model's context window size minus the maximum output tokens. + minimum: 0 + required: + - type + - retention_ratio + RealtimeTurnDetection: + anyOf: + - title: Realtime Turn Detection + description: > + Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to + turn off, in which case the client must manually trigger model response. + + + Server VAD means that the model will detect the start and end of speech based on audio volume and + respond at the end of user speech. + + + Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + semantically estimate whether the user has finished speaking, then dynamically sets a timeout + based on this probability. For example, if user audio trails off with "uhhm", the model will score + a low probability of turn end and wait longer for the user to continue speaking. This can be + useful for more natural conversations, but may have a higher latency. + discriminator: + propertyName: type + anyOf: + - type: object + title: Server VAD + description: >- + Server-side voice activity detection (VAD) which flips on when user speech is detected and off + after a period of silence. + required: + - type + properties: + type: + type: string + default: server_vad + const: server_vad + description: | + Type of turn detection, `server_vad` to turn on simple Server VAD. + threshold: + type: number + description: > + Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults + to 0.5. A + + higher threshold will require louder audio to activate the model, and + + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: > + Used only for `server_vad` mode. Amount of audio to include before the VAD detected speech + (in + + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: > + Used only for `server_vad` mode. Duration of silence to detect speech stop (in + milliseconds). Defaults + + to 500ms. With shorter values the model will respond more quickly, + + but may jump in on short pauses from the user. + create_response: + type: boolean + default: true + description: | + Whether or not to automatically generate a response when a VAD stop event occurs. + interrupt_response: + type: boolean + default: true + description: | + Whether or not to automatically interrupt any ongoing response with output to the default + conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + idle_timeout_ms: + anyOf: + - type: integer + minimum: 5000 + maximum: 30000 + description: > + Optional timeout after which a model response will be triggered automatically. This is + + useful for situations in which a long pause from the user is unexpected, such as a + phone + + call. The model will effectively prompt the user to continue the conversation based + + on the current context. + + + The timeout value will be applied after the last model response's audio has finished + playing, + + i.e. it's set to the `response.done` time plus audio playback duration. + + + An `input_audio_buffer.timeout_triggered` event (plus events + + associated with the Response) will be emitted when the timeout is reached. + + Idle timeout is currently only supported for `server_vad` mode. + - type: 'null' + - type: object + title: Semantic VAD + description: >- + Server-side semantic turn detection which uses a model to determine when the user has finished + speaking. + required: + - type + properties: + type: + type: string + const: semantic_vad + description: | + Type of turn detection, `semantic_vad` to turn on Semantic VAD. + eagerness: + type: string + default: auto + enum: + - low + - medium + - high + - auto + description: > + Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait + longer for the user to continue speaking, `high` will respond more quickly. `auto` is the + default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of + 8s, 4s, and 2s respectively. + create_response: + type: boolean + default: true + description: | + Whether or not to automatically generate a response when a VAD stop event occurs. + interrupt_response: + type: boolean + default: true + description: | + Whether or not to automatically interrupt any ongoing response with output to the default + conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + - type: 'null' + Reasoning: + type: object + description: | + **gpt-5 and o-series models only** + + Configuration options for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + title: Reasoning + properties: + effort: + $ref: '#/components/schemas/ReasoningEffort' + summary: + anyOf: + - type: string + description: | + A summary of the reasoning performed by the model. This can be + useful for debugging and understanding the model's reasoning process. + One of `auto`, `concise`, or `detailed`. + + `concise` is only supported for `computer-use-preview` models. + enum: + - auto + - concise + - detailed + - type: 'null' + generate_summary: + anyOf: + - type: string + deprecated: true + description: | + **Deprecated:** use `summary` instead. + + A summary of the reasoning performed by the model. This can be + useful for debugging and understanding the model's reasoning process. + One of `auto`, `concise`, or `detailed`. + enum: + - auto + - concise + - detailed + - type: 'null' + ReasoningEffort: + anyOf: + - type: string + enum: + - none + - minimal + - low + - medium + - high + default: medium + description: > + Constrains effort on reasoning for + + [reasoning models](https://platform.openai.com/docs/guides/reasoning). + + Currently supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing + + reasoning effort can result in faster responses and fewer tokens used + + on reasoning in a response. + + + - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values + for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool calls are supported for all reasoning + values in gpt-5.1. + + - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. + + - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - type: 'null' + ReasoningItem: + type: object + description: | + A description of the chain of thought used by a reasoning model while generating + a response. Be sure to include these items in your `input` to the Responses API + for subsequent turns of a conversation if you are manually + [managing context](https://platform.openai.com/docs/guides/conversation-state). + title: Reasoning + properties: + type: + type: string + description: | + The type of the object. Always `reasoning`. + enum: + - reasoning + x-stainless-const: true + id: + type: string + description: | + The unique identifier of the reasoning content. + encrypted_content: + anyOf: + - type: string + description: | + The encrypted content of the reasoning item - populated when a response is + generated with `reasoning.encrypted_content` in the `include` parameter. + - type: 'null' + summary: + type: array + description: | + Reasoning summary content. + items: + $ref: '#/components/schemas/Summary' + content: + type: array + description: | + Reasoning text content. + items: + $ref: '#/components/schemas/ReasoningTextContent' + status: + type: string + description: | + The status of the item. One of `in_progress`, `completed`, or + `incomplete`. Populated when items are returned via API. + enum: + - in_progress + - completed + - incomplete + required: + - id + - summary + - type + Response: + title: The response object + allOf: + - $ref: '#/components/schemas/ModelResponseProperties' + - $ref: '#/components/schemas/ResponseProperties' + - type: object + properties: + id: + type: string + description: | + Unique identifier for this Response. + object: + type: string + description: | + The object type of this resource - always set to `response`. + enum: + - response + x-stainless-const: true + status: + type: string + description: | + The status of the response generation. One of `completed`, `failed`, + `in_progress`, `cancelled`, `queued`, or `incomplete`. + enum: + - completed + - failed + - in_progress + - cancelled + - queued + - incomplete + created_at: + type: number + description: | + Unix timestamp (in seconds) of when this Response was created. + error: + $ref: '#/components/schemas/ResponseError' + incomplete_details: + anyOf: + - type: object + description: | + Details about why the response is incomplete. + properties: + reason: + type: string + description: The reason why the response is incomplete. + enum: + - max_output_tokens + - content_filter + - type: 'null' + output: + type: array + description: | + An array of content items generated by the model. + + - The length and order of items in the `output` array is dependent + on the model's response. + - Rather than accessing the first item in the `output` array and + assuming it's an `assistant` message with the content generated by + the model, you might consider using the `output_text` property where + supported in SDKs. + items: + $ref: '#/components/schemas/OutputItem' + instructions: + anyOf: + - description: | + A system (or developer) message inserted into the model's context. + + When using along with `previous_response_id`, the instructions from a previous + response will not be carried over to the next response. This makes it simple + to swap out system (or developer) messages in new responses. + anyOf: + - type: string + description: | + A text input to the model, equivalent to a text input with the + `developer` role. + - type: array + title: Input item list + description: | + A list of one or many input items to the model, containing + different content types. + items: + $ref: '#/components/schemas/InputItem' + - type: 'null' + output_text: + anyOf: + - type: string + description: | + SDK-only convenience property that contains the aggregated text output + from all `output_text` items in the `output` array, if any are present. + Supported in the Python and JavaScript SDKs. + x-oaiSupportedSDKs: + - python + - javascript + - type: 'null' + x-stainless-skip: true + usage: + $ref: '#/components/schemas/ResponseUsage' + parallel_tool_calls: + type: boolean + description: | + Whether to allow the model to run tool calls in parallel. + default: true + conversation: + anyOf: + - $ref: '#/components/schemas/Conversation-2' + - type: 'null' + required: + - id + - object + - created_at + - error + - incomplete_details + - instructions + - model + - tools + - output + - parallel_tool_calls + - metadata + - tool_choice + - temperature + - top_p + ResponseAudioDeltaEvent: + type: object + description: Emitted when there is a partial audio response. + properties: + type: + type: string + description: | + The type of the event. Always `response.audio.delta`. + enum: + - response.audio.delta + x-stainless-const: true + sequence_number: + type: integer + description: | + A sequence number for this chunk of the stream response. + delta: + type: string + description: | + A chunk of Base64 encoded response audio bytes. + required: + - type + - delta + - sequence_number + x-oaiMeta: + name: response.audio.delta + group: responses + example: | + { + "type": "response.audio.delta", + "response_id": "resp_123", + "delta": "base64encoded...", + "sequence_number": 1 + } + ResponseAudioDoneEvent: + type: object + description: Emitted when the audio response is complete. + properties: + type: + type: string + description: | + The type of the event. Always `response.audio.done`. + enum: + - response.audio.done + x-stainless-const: true + sequence_number: + type: integer + description: | + The sequence number of the delta. + required: + - type + - sequence_number + - response_id + x-oaiMeta: + name: response.audio.done + group: responses + example: | + { + "type": "response.audio.done", + "response_id": "resp-123", + "sequence_number": 1 + } + ResponseAudioTranscriptDeltaEvent: + type: object + description: Emitted when there is a partial transcript of audio. + properties: + type: + type: string + description: | + The type of the event. Always `response.audio.transcript.delta`. + enum: + - response.audio.transcript.delta + x-stainless-const: true + delta: + type: string + description: | + The partial transcript of the audio response. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - response_id + - delta + - sequence_number + x-oaiMeta: + name: response.audio.transcript.delta + group: responses + example: | + { + "type": "response.audio.transcript.delta", + "response_id": "resp_123", + "delta": " ... partial transcript ... ", + "sequence_number": 1 + } + ResponseAudioTranscriptDoneEvent: + type: object + description: Emitted when the full audio transcript is completed. + properties: + type: + type: string + description: | + The type of the event. Always `response.audio.transcript.done`. + enum: + - response.audio.transcript.done + x-stainless-const: true + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - response_id + - sequence_number + x-oaiMeta: + name: response.audio.transcript.done + group: responses + example: | + { + "type": "response.audio.transcript.done", + "response_id": "resp_123", + "sequence_number": 1 + } + ResponseCodeInterpreterCallCodeDeltaEvent: + type: object + description: Emitted when a partial code snippet is streamed by the code interpreter. + properties: + type: + type: string + description: The type of the event. Always `response.code_interpreter_call_code.delta`. + enum: + - response.code_interpreter_call_code.delta + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response for which the code is being streamed. + item_id: + type: string + description: The unique identifier of the code interpreter tool call item. + delta: + type: string + description: The partial code snippet being streamed by the code interpreter. + sequence_number: + type: integer + description: The sequence number of this event, used to order streaming events. + required: + - type + - output_index + - item_id + - delta + - sequence_number + x-oaiMeta: + name: response.code_interpreter_call_code.delta + group: responses + example: | + { + "type": "response.code_interpreter_call_code.delta", + "output_index": 0, + "item_id": "ci_12345", + "delta": "print('Hello, world')", + "sequence_number": 1 + } + ResponseCodeInterpreterCallCodeDoneEvent: + type: object + description: Emitted when the code snippet is finalized by the code interpreter. + properties: + type: + type: string + description: The type of the event. Always `response.code_interpreter_call_code.done`. + enum: + - response.code_interpreter_call_code.done + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response for which the code is finalized. + item_id: + type: string + description: The unique identifier of the code interpreter tool call item. + code: + type: string + description: The final code snippet output by the code interpreter. + sequence_number: + type: integer + description: The sequence number of this event, used to order streaming events. + required: + - type + - output_index + - item_id + - code + - sequence_number + x-oaiMeta: + name: response.code_interpreter_call_code.done + group: responses + example: | + { + "type": "response.code_interpreter_call_code.done", + "output_index": 3, + "item_id": "ci_12345", + "code": "print('done')", + "sequence_number": 1 + } + ResponseCodeInterpreterCallCompletedEvent: + type: object + description: Emitted when the code interpreter call is completed. + properties: + type: + type: string + description: The type of the event. Always `response.code_interpreter_call.completed`. + enum: + - response.code_interpreter_call.completed + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response for which the code interpreter call is completed. + item_id: + type: string + description: The unique identifier of the code interpreter tool call item. + sequence_number: + type: integer + description: The sequence number of this event, used to order streaming events. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.code_interpreter_call.completed + group: responses + example: | + { + "type": "response.code_interpreter_call.completed", + "output_index": 5, + "item_id": "ci_12345", + "sequence_number": 1 + } + ResponseCodeInterpreterCallInProgressEvent: + type: object + description: Emitted when a code interpreter call is in progress. + properties: + type: + type: string + description: The type of the event. Always `response.code_interpreter_call.in_progress`. + enum: + - response.code_interpreter_call.in_progress + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response for which the code interpreter call is in progress. + item_id: + type: string + description: The unique identifier of the code interpreter tool call item. + sequence_number: + type: integer + description: The sequence number of this event, used to order streaming events. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.code_interpreter_call.in_progress + group: responses + example: | + { + "type": "response.code_interpreter_call.in_progress", + "output_index": 0, + "item_id": "ci_12345", + "sequence_number": 1 + } + ResponseCodeInterpreterCallInterpretingEvent: + type: object + description: Emitted when the code interpreter is actively interpreting the code snippet. + properties: + type: + type: string + description: The type of the event. Always `response.code_interpreter_call.interpreting`. + enum: + - response.code_interpreter_call.interpreting + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response for which the code interpreter is interpreting code. + item_id: + type: string + description: The unique identifier of the code interpreter tool call item. + sequence_number: + type: integer + description: The sequence number of this event, used to order streaming events. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.code_interpreter_call.interpreting + group: responses + example: | + { + "type": "response.code_interpreter_call.interpreting", + "output_index": 4, + "item_id": "ci_12345", + "sequence_number": 1 + } + ResponseCompletedEvent: + type: object + description: Emitted when the model response is complete. + properties: + type: + type: string + description: | + The type of the event. Always `response.completed`. + enum: + - response.completed + x-stainless-const: true + response: + $ref: '#/components/schemas/Response' + description: | + Properties of the completed response. + sequence_number: + type: integer + description: The sequence number for this event. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.completed + group: responses + example: | + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "created_at": 1740855869, + "status": "completed", + "error": null, + "incomplete_details": null, + "input": [], + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [ + { + "id": "msg_123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "In a shimmering forest under a sky full of stars, a lonely unicorn named Lila discovered a hidden pond that glowed with moonlight. Every night, she would leave sparkling, magical flowers by the water's edge, hoping to share her beauty with others. One enchanting evening, she woke to find a group of friendly animals gathered around, eager to be friends and share in her magic.", + "annotations": [] + } + ] + } + ], + "previous_response_id": null, + "reasoning_effort": null, + "store": false, + "temperature": 1, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1, + "truncation": "disabled", + "usage": { + "input_tokens": 0, + "output_tokens": 0, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 0 + }, + "user": null, + "metadata": {} + }, + "sequence_number": 1 + } + ResponseContentPartAddedEvent: + type: object + description: Emitted when a new content part is added. + properties: + type: + type: string + description: | + The type of the event. Always `response.content_part.added`. + enum: + - response.content_part.added + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the content part was added to. + output_index: + type: integer + description: | + The index of the output item that the content part was added to. + content_index: + type: integer + description: | + The index of the content part that was added. + part: + $ref: '#/components/schemas/OutputContent' + description: | + The content part that was added. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - content_index + - part + - sequence_number + x-oaiMeta: + name: response.content_part.added + group: responses + example: | + { + "type": "response.content_part.added", + "item_id": "msg_123", + "output_index": 0, + "content_index": 0, + "part": { + "type": "output_text", + "text": "", + "annotations": [] + }, + "sequence_number": 1 + } + ResponseContentPartDoneEvent: + type: object + description: Emitted when a content part is done. + properties: + type: + type: string + description: | + The type of the event. Always `response.content_part.done`. + enum: + - response.content_part.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the content part was added to. + output_index: + type: integer + description: | + The index of the output item that the content part was added to. + content_index: + type: integer + description: | + The index of the content part that is done. + sequence_number: + type: integer + description: The sequence number of this event. + part: + $ref: '#/components/schemas/OutputContent' + description: | + The content part that is done. + required: + - type + - item_id + - output_index + - content_index + - part + - sequence_number + x-oaiMeta: + name: response.content_part.done + group: responses + example: | + { + "type": "response.content_part.done", + "item_id": "msg_123", + "output_index": 0, + "content_index": 0, + "sequence_number": 1, + "part": { + "type": "output_text", + "text": "In a shimmering forest under a sky full of stars, a lonely unicorn named Lila discovered a hidden pond that glowed with moonlight. Every night, she would leave sparkling, magical flowers by the water's edge, hoping to share her beauty with others. One enchanting evening, she woke to find a group of friendly animals gathered around, eager to be friends and share in her magic.", + "annotations": [] + } + } + ResponseCreatedEvent: + type: object + description: | + An event that is emitted when a response is created. + properties: + type: + type: string + description: | + The type of the event. Always `response.created`. + enum: + - response.created + x-stainless-const: true + response: + $ref: '#/components/schemas/Response' + description: | + The response that was created. + sequence_number: + type: integer + description: The sequence number for this event. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.created + group: responses + example: | + { + "type": "response.created", + "response": { + "id": "resp_67ccfcdd16748190a91872c75d38539e09e4d4aac714747c", + "object": "response", + "created_at": 1741487325, + "status": "in_progress", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-2024-08-06", + "output": [], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1, + "truncation": "disabled", + "usage": null, + "user": null, + "metadata": {} + }, + "sequence_number": 1 + } + ResponseCustomToolCallInputDeltaEvent: + title: ResponseCustomToolCallInputDelta + type: object + description: | + Event representing a delta (partial update) to the input of a custom tool call. + properties: + type: + type: string + enum: + - response.custom_tool_call_input.delta + description: The event type identifier. + x-stainless-const: true + sequence_number: + type: integer + description: The sequence number of this event. + output_index: + type: integer + description: The index of the output this delta applies to. + item_id: + type: string + description: Unique identifier for the API item associated with this event. + delta: + type: string + description: The incremental input data (delta) for the custom tool call. + required: + - type + - output_index + - item_id + - delta + - sequence_number + x-oaiMeta: + name: response.custom_tool_call_input.delta + group: responses + example: | + { + "type": "response.custom_tool_call_input.delta", + "output_index": 0, + "item_id": "ctc_1234567890abcdef", + "delta": "partial input text" + } + ResponseCustomToolCallInputDoneEvent: + title: ResponseCustomToolCallInputDone + type: object + description: | + Event indicating that input for a custom tool call is complete. + properties: + type: + type: string + enum: + - response.custom_tool_call_input.done + description: The event type identifier. + x-stainless-const: true + sequence_number: + type: integer + description: The sequence number of this event. + output_index: + type: integer + description: The index of the output this event applies to. + item_id: + type: string + description: Unique identifier for the API item associated with this event. + input: + type: string + description: The complete input data for the custom tool call. + required: + - type + - output_index + - item_id + - input + - sequence_number + x-oaiMeta: + name: response.custom_tool_call_input.done + group: responses + example: | + { + "type": "response.custom_tool_call_input.done", + "output_index": 0, + "item_id": "ctc_1234567890abcdef", + "input": "final complete input text" + } + ResponseError: + anyOf: + - type: object + description: | + An error object returned when the model fails to generate a Response. + properties: + code: + $ref: '#/components/schemas/ResponseErrorCode' + message: + type: string + description: | + A human-readable description of the error. + required: + - code + - message + - type: 'null' + ResponseErrorCode: + type: string + description: | + The error code for the response. + enum: + - server_error + - rate_limit_exceeded + - invalid_prompt + - vector_store_timeout + - invalid_image + - invalid_image_format + - invalid_base64_image + - invalid_image_url + - image_too_large + - image_too_small + - image_parse_error + - image_content_policy_violation + - invalid_image_mode + - image_file_too_large + - unsupported_image_media_type + - empty_image_file + - failed_to_download_image + - image_file_not_found + ResponseErrorEvent: + type: object + description: Emitted when an error occurs. + properties: + type: + type: string + description: | + The type of the event. Always `error`. + enum: + - error + x-stainless-const: true + code: + anyOf: + - type: string + description: | + The error code. + - type: 'null' + message: + type: string + description: | + The error message. + param: + anyOf: + - type: string + description: | + The error parameter. + - type: 'null' + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - code + - message + - param + - sequence_number + x-oaiMeta: + name: error + group: responses + example: | + { + "type": "error", + "code": "ERR_SOMETHING", + "message": "Something went wrong", + "param": null, + "sequence_number": 1 + } + ResponseFailedEvent: + type: object + description: | + An event that is emitted when a response fails. + properties: + type: + type: string + description: | + The type of the event. Always `response.failed`. + enum: + - response.failed + x-stainless-const: true + sequence_number: + type: integer + description: The sequence number of this event. + response: + $ref: '#/components/schemas/Response' + description: | + The response that failed. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.failed + group: responses + example: | + { + "type": "response.failed", + "response": { + "id": "resp_123", + "object": "response", + "created_at": 1740855869, + "status": "failed", + "error": { + "code": "server_error", + "message": "The model failed to generate a response." + }, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [], + "previous_response_id": null, + "reasoning_effort": null, + "store": false, + "temperature": 1, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1, + "truncation": "disabled", + "usage": null, + "user": null, + "metadata": {} + } + } + ResponseFileSearchCallCompletedEvent: + type: object + description: Emitted when a file search call is completed (results found). + properties: + type: + type: string + description: | + The type of the event. Always `response.file_search_call.completed`. + enum: + - response.file_search_call.completed + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the file search call is initiated. + item_id: + type: string + description: | + The ID of the output item that the file search call is initiated. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.file_search_call.completed + group: responses + example: | + { + "type": "response.file_search_call.completed", + "output_index": 0, + "item_id": "fs_123", + "sequence_number": 1 + } + ResponseFileSearchCallInProgressEvent: + type: object + description: Emitted when a file search call is initiated. + properties: + type: + type: string + description: | + The type of the event. Always `response.file_search_call.in_progress`. + enum: + - response.file_search_call.in_progress + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the file search call is initiated. + item_id: + type: string + description: | + The ID of the output item that the file search call is initiated. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.file_search_call.in_progress + group: responses + example: | + { + "type": "response.file_search_call.in_progress", + "output_index": 0, + "item_id": "fs_123", + "sequence_number": 1 + } + ResponseFileSearchCallSearchingEvent: + type: object + description: Emitted when a file search is currently searching. + properties: + type: + type: string + description: | + The type of the event. Always `response.file_search_call.searching`. + enum: + - response.file_search_call.searching + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the file search call is searching. + item_id: + type: string + description: | + The ID of the output item that the file search call is initiated. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.file_search_call.searching + group: responses + example: | + { + "type": "response.file_search_call.searching", + "output_index": 0, + "item_id": "fs_123", + "sequence_number": 1 + } + ResponseFormatJsonObject: + type: object + title: JSON object + description: | + JSON object response format. An older method of generating JSON responses. + Using `json_schema` is recommended for models that support it. Note that the + model will not generate JSON without a system or user message instructing it + to do so. + properties: + type: + type: string + description: The type of response format being defined. Always `json_object`. + enum: + - json_object + x-stainless-const: true + required: + - type + ResponseFormatJsonSchema: + type: object + title: JSON schema + description: | + JSON Schema response format. Used to generate structured JSON responses. + Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). + properties: + type: + type: string + description: The type of response format being defined. Always `json_schema`. + enum: + - json_schema + x-stainless-const: true + json_schema: + type: object + title: JSON schema + description: | + Structured Outputs configuration options, including a JSON Schema. + properties: + description: + type: string + description: | + A description of what the response format is for, used by the model to + determine how to respond in the format. + name: + type: string + description: | + The name of the response format. Must be a-z, A-Z, 0-9, or contain + underscores and dashes, with a maximum length of 64. + schema: + $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema' + strict: + anyOf: + - type: boolean + default: false + description: | + Whether to enable strict schema adherence when generating the output. + If set to true, the model will always follow the exact schema defined + in the `schema` field. Only a subset of JSON Schema is supported when + `strict` is `true`. To learn more, read the [Structured Outputs + guide](https://platform.openai.com/docs/guides/structured-outputs). + - type: 'null' + required: + - name + required: + - type + - json_schema + ResponseFormatJsonSchemaSchema: + type: object + title: JSON schema + description: | + The schema for the response format, described as a JSON Schema object. + Learn how to build JSON schemas [here](https://json-schema.org/). + additionalProperties: true + ResponseFormatText: + type: object + title: Text + description: | + Default response format. Used to generate text responses. + properties: + type: + type: string + description: The type of response format being defined. Always `text`. + enum: + - text + x-stainless-const: true + required: + - type + ResponseFormatTextGrammar: + type: object + title: Text grammar + description: | + A custom grammar for the model to follow when generating text. + Learn more in the [custom grammars guide](https://platform.openai.com/docs/guides/custom-grammars). + properties: + type: + type: string + description: The type of response format being defined. Always `grammar`. + enum: + - grammar + x-stainless-const: true + grammar: + type: string + description: The custom grammar for the model to follow. + required: + - type + - grammar + ResponseFormatTextPython: + type: object + title: Python grammar + description: | + Configure the model to generate valid Python code. See the + [custom grammars guide](https://platform.openai.com/docs/guides/custom-grammars) for more details. + properties: + type: + type: string + description: The type of response format being defined. Always `python`. + enum: + - python + x-stainless-const: true + required: + - type + ResponseFunctionCallArgumentsDeltaEvent: + type: object + description: Emitted when there is a partial function-call arguments delta. + properties: + type: + type: string + description: | + The type of the event. Always `response.function_call_arguments.delta`. + enum: + - response.function_call_arguments.delta + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the function-call arguments delta is added to. + output_index: + type: integer + description: | + The index of the output item that the function-call arguments delta is added to. + sequence_number: + type: integer + description: The sequence number of this event. + delta: + type: string + description: | + The function-call arguments delta that is added. + required: + - type + - item_id + - output_index + - delta + - sequence_number + x-oaiMeta: + name: response.function_call_arguments.delta + group: responses + example: | + { + "type": "response.function_call_arguments.delta", + "item_id": "item-abc", + "output_index": 0, + "delta": "{ \"arg\":" + "sequence_number": 1 + } + ResponseFunctionCallArgumentsDoneEvent: + type: object + description: Emitted when function-call arguments are finalized. + properties: + type: + type: string + enum: + - response.function_call_arguments.done + x-stainless-const: true + item_id: + type: string + description: The ID of the item. + name: + type: string + description: The name of the function that was called. + output_index: + type: integer + description: The index of the output item. + sequence_number: + type: integer + description: The sequence number of this event. + arguments: + type: string + description: The function-call arguments. + required: + - type + - item_id + - name + - output_index + - arguments + - sequence_number + x-oaiMeta: + name: response.function_call_arguments.done + group: responses + example: | + { + "type": "response.function_call_arguments.done", + "item_id": "item-abc", + "name": "get_weather", + "output_index": 1, + "arguments": "{ \"arg\": 123 }", + "sequence_number": 1 + } + ResponseImageGenCallCompletedEvent: + type: object + title: ResponseImageGenCallCompletedEvent + description: | + Emitted when an image generation tool call has completed and the final image is available. + properties: + type: + type: string + enum: + - response.image_generation_call.completed + description: The type of the event. Always 'response.image_generation_call.completed'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + sequence_number: + type: integer + description: The sequence number of this event. + item_id: + type: string + description: The unique identifier of the image generation item being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.image_generation_call.completed + group: responses + example: | + { + "type": "response.image_generation_call.completed", + "output_index": 0, + "item_id": "item-123", + "sequence_number": 1 + } + ResponseImageGenCallGeneratingEvent: + type: object + title: ResponseImageGenCallGeneratingEvent + description: | + Emitted when an image generation tool call is actively generating an image (intermediate state). + properties: + type: + type: string + enum: + - response.image_generation_call.generating + description: The type of the event. Always 'response.image_generation_call.generating'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the image generation item being processed. + sequence_number: + type: integer + description: The sequence number of the image generation item being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.image_generation_call.generating + group: responses + example: | + { + "type": "response.image_generation_call.generating", + "output_index": 0, + "item_id": "item-123", + "sequence_number": 0 + } + ResponseImageGenCallInProgressEvent: + type: object + title: ResponseImageGenCallInProgressEvent + description: | + Emitted when an image generation tool call is in progress. + properties: + type: + type: string + enum: + - response.image_generation_call.in_progress + description: The type of the event. Always 'response.image_generation_call.in_progress'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the image generation item being processed. + sequence_number: + type: integer + description: The sequence number of the image generation item being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.image_generation_call.in_progress + group: responses + example: | + { + "type": "response.image_generation_call.in_progress", + "output_index": 0, + "item_id": "item-123", + "sequence_number": 0 + } + ResponseImageGenCallPartialImageEvent: + type: object + title: ResponseImageGenCallPartialImageEvent + description: | + Emitted when a partial image is available during image generation streaming. + properties: + type: + type: string + enum: + - response.image_generation_call.partial_image + description: The type of the event. Always 'response.image_generation_call.partial_image'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the image generation item being processed. + sequence_number: + type: integer + description: The sequence number of the image generation item being processed. + partial_image_index: + type: integer + description: 0-based index for the partial image (backend is 1-based, but this is 0-based for the user). + partial_image_b64: + type: string + description: Base64-encoded partial image data, suitable for rendering as an image. + required: + - type + - output_index + - item_id + - sequence_number + - partial_image_index + - partial_image_b64 + x-oaiMeta: + name: response.image_generation_call.partial_image + group: responses + example: | + { + "type": "response.image_generation_call.partial_image", + "output_index": 0, + "item_id": "item-123", + "sequence_number": 0, + "partial_image_index": 0, + "partial_image_b64": "..." + } + ResponseInProgressEvent: + type: object + description: Emitted when the response is in progress. + properties: + type: + type: string + description: | + The type of the event. Always `response.in_progress`. + enum: + - response.in_progress + x-stainless-const: true + response: + $ref: '#/components/schemas/Response' + description: | + The response that is in progress. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.in_progress + group: responses + example: | + { + "type": "response.in_progress", + "response": { + "id": "resp_67ccfcdd16748190a91872c75d38539e09e4d4aac714747c", + "object": "response", + "created_at": 1741487325, + "status": "in_progress", + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-2024-08-06", + "output": [], + "parallel_tool_calls": true, + "previous_response_id": null, + "reasoning": { + "effort": null, + "summary": null + }, + "store": true, + "temperature": 1, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1, + "truncation": "disabled", + "usage": null, + "user": null, + "metadata": {} + }, + "sequence_number": 1 + } + ResponseIncompleteEvent: + type: object + description: | + An event that is emitted when a response finishes as incomplete. + properties: + type: + type: string + description: | + The type of the event. Always `response.incomplete`. + enum: + - response.incomplete + x-stainless-const: true + response: + $ref: '#/components/schemas/Response' + description: | + The response that was incomplete. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.incomplete + group: responses + example: | + { + "type": "response.incomplete", + "response": { + "id": "resp_123", + "object": "response", + "created_at": 1740855869, + "status": "incomplete", + "error": null, + "incomplete_details": { + "reason": "max_tokens" + }, + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-mini-2024-07-18", + "output": [], + "previous_response_id": null, + "reasoning_effort": null, + "store": false, + "temperature": 1, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1, + "truncation": "disabled", + "usage": null, + "user": null, + "metadata": {} + }, + "sequence_number": 1 + } + ResponseItemList: + type: object + description: A list of Response items. + properties: + object: + description: The type of object returned, must be `list`. + x-stainless-const: true + const: list + data: + type: array + description: A list of items used to generate this response. + items: + $ref: '#/components/schemas/ItemResource' + has_more: + type: boolean + description: Whether there are more items available. + first_id: + type: string + description: The ID of the first item in the list. + last_id: + type: string + description: The ID of the last item in the list. + required: + - object + - data + - has_more + - first_id + - last_id + x-oaiMeta: + name: The input item list + group: responses + example: | + { + "object": "list", + "data": [ + { + "id": "msg_abc123", + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Tell me a three sentence bedtime story about a unicorn." + } + ] + } + ], + "first_id": "msg_abc123", + "last_id": "msg_abc123", + "has_more": false + } + ResponseLogProb: + type: object + description: | + A logprob is the logarithmic probability that the model assigns to producing + a particular token at a given position in the sequence. Less-negative (higher) + logprob values indicate greater model confidence in that token choice. + properties: + token: + description: A possible text token. + type: string + logprob: + description: | + The log probability of this token. + type: number + top_logprobs: + description: | + The log probability of the top 20 most likely tokens. + type: array + items: + type: object + properties: + token: + description: A possible text token. + type: string + logprob: + description: The log probability of this token. + type: number + required: + - token + - logprob + ResponseMCPCallArgumentsDeltaEvent: + type: object + title: ResponseMCPCallArgumentsDeltaEvent + description: | + Emitted when there is a delta (partial update) to the arguments of an MCP tool call. + properties: + type: + type: string + enum: + - response.mcp_call_arguments.delta + description: The type of the event. Always 'response.mcp_call_arguments.delta'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the MCP tool call item being processed. + delta: + type: string + description: | + A JSON string containing the partial update to the arguments for the MCP tool call. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - output_index + - item_id + - delta + - sequence_number + x-oaiMeta: + name: response.mcp_call_arguments.delta + group: responses + example: | + { + "type": "response.mcp_call_arguments.delta", + "output_index": 0, + "item_id": "item-abc", + "delta": "{", + "sequence_number": 1 + } + ResponseMCPCallArgumentsDoneEvent: + type: object + title: ResponseMCPCallArgumentsDoneEvent + description: | + Emitted when the arguments for an MCP tool call are finalized. + properties: + type: + type: string + enum: + - response.mcp_call_arguments.done + description: The type of the event. Always 'response.mcp_call_arguments.done'. + x-stainless-const: true + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the MCP tool call item being processed. + arguments: + type: string + description: | + A JSON string containing the finalized arguments for the MCP tool call. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - output_index + - item_id + - arguments + - sequence_number + x-oaiMeta: + name: response.mcp_call_arguments.done + group: responses + example: | + { + "type": "response.mcp_call_arguments.done", + "output_index": 0, + "item_id": "item-abc", + "arguments": "{\"arg1\": \"value1\", \"arg2\": \"value2\"}", + "sequence_number": 1 + } + ResponseMCPCallCompletedEvent: + type: object + title: ResponseMCPCallCompletedEvent + description: | + Emitted when an MCP tool call has completed successfully. + properties: + type: + type: string + enum: + - response.mcp_call.completed + description: The type of the event. Always 'response.mcp_call.completed'. + x-stainless-const: true + item_id: + type: string + description: The ID of the MCP tool call item that completed. + output_index: + type: integer + description: The index of the output item that completed. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - sequence_number + x-oaiMeta: + name: response.mcp_call.completed + group: responses + example: | + { + "type": "response.mcp_call.completed", + "sequence_number": 1, + "item_id": "mcp_682d437d90a88191bf88cd03aae0c3e503937d5f622d7a90", + "output_index": 0 + } + ResponseMCPCallFailedEvent: + type: object + title: ResponseMCPCallFailedEvent + description: | + Emitted when an MCP tool call has failed. + properties: + type: + type: string + enum: + - response.mcp_call.failed + description: The type of the event. Always 'response.mcp_call.failed'. + x-stainless-const: true + item_id: + type: string + description: The ID of the MCP tool call item that failed. + output_index: + type: integer + description: The index of the output item that failed. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - sequence_number + x-oaiMeta: + name: response.mcp_call.failed + group: responses + example: | + { + "type": "response.mcp_call.failed", + "sequence_number": 1, + "item_id": "mcp_682d437d90a88191bf88cd03aae0c3e503937d5f622d7a90", + "output_index": 0 + } + ResponseMCPCallInProgressEvent: + type: object + title: ResponseMCPCallInProgressEvent + description: | + Emitted when an MCP tool call is in progress. + properties: + type: + type: string + enum: + - response.mcp_call.in_progress + description: The type of the event. Always 'response.mcp_call.in_progress'. + x-stainless-const: true + sequence_number: + type: integer + description: The sequence number of this event. + output_index: + type: integer + description: The index of the output item in the response's output array. + item_id: + type: string + description: The unique identifier of the MCP tool call item being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.mcp_call.in_progress + group: responses + example: | + { + "type": "response.mcp_call.in_progress", + "sequence_number": 1, + "output_index": 0, + "item_id": "mcp_682d437d90a88191bf88cd03aae0c3e503937d5f622d7a90" + } + ResponseMCPListToolsCompletedEvent: + type: object + title: ResponseMCPListToolsCompletedEvent + description: | + Emitted when the list of available MCP tools has been successfully retrieved. + properties: + type: + type: string + enum: + - response.mcp_list_tools.completed + description: The type of the event. Always 'response.mcp_list_tools.completed'. + x-stainless-const: true + item_id: + type: string + description: The ID of the MCP tool call item that produced this output. + output_index: + type: integer + description: The index of the output item that was processed. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - sequence_number + x-oaiMeta: + name: response.mcp_list_tools.completed + group: responses + example: | + { + "type": "response.mcp_list_tools.completed", + "sequence_number": 1, + "output_index": 0, + "item_id": "mcpl_682d4379df088191886b70f4ec39f90403937d5f622d7a90" + } + ResponseMCPListToolsFailedEvent: + type: object + title: ResponseMCPListToolsFailedEvent + description: | + Emitted when the attempt to list available MCP tools has failed. + properties: + type: + type: string + enum: + - response.mcp_list_tools.failed + description: The type of the event. Always 'response.mcp_list_tools.failed'. + x-stainless-const: true + item_id: + type: string + description: The ID of the MCP tool call item that failed. + output_index: + type: integer + description: The index of the output item that failed. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - sequence_number + x-oaiMeta: + name: response.mcp_list_tools.failed + group: responses + example: | + { + "type": "response.mcp_list_tools.failed", + "sequence_number": 1, + "output_index": 0, + "item_id": "mcpl_682d4379df088191886b70f4ec39f90403937d5f622d7a90" + } + ResponseMCPListToolsInProgressEvent: + type: object + title: ResponseMCPListToolsInProgressEvent + description: | + Emitted when the system is in the process of retrieving the list of available MCP tools. + properties: + type: + type: string + enum: + - response.mcp_list_tools.in_progress + description: The type of the event. Always 'response.mcp_list_tools.in_progress'. + x-stainless-const: true + item_id: + type: string + description: The ID of the MCP tool call item that is being processed. + output_index: + type: integer + description: The index of the output item that is being processed. + sequence_number: + type: integer + description: The sequence number of this event. + required: + - type + - item_id + - output_index + - sequence_number + x-oaiMeta: + name: response.mcp_list_tools.in_progress + group: responses + example: | + { + "type": "response.mcp_list_tools.in_progress", + "sequence_number": 1, + "output_index": 0, + "item_id": "mcpl_682d4379df088191886b70f4ec39f90403937d5f622d7a90" + } + ResponseModalities: + anyOf: + - type: array + description: > + Output types that you would like the model to generate. + + Most models are capable of generating text, which is the default: + + + `["text"]` + + + The `gpt-4o-audio-preview` model can also be used to + + [generate audio](https://platform.openai.com/docs/guides/audio). To request that this model + generate + + both text and audio responses, you can use: + + + `["text", "audio"]` + items: + type: string + enum: + - text + - audio + - type: 'null' + ResponseOutputItemAddedEvent: + type: object + description: Emitted when a new output item is added. + properties: + type: + type: string + description: | + The type of the event. Always `response.output_item.added`. + enum: + - response.output_item.added + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that was added. + sequence_number: + type: integer + description: | + The sequence number of this event. + item: + $ref: '#/components/schemas/OutputItem' + description: | + The output item that was added. + required: + - type + - output_index + - item + - sequence_number + x-oaiMeta: + name: response.output_item.added + group: responses + example: | + { + "type": "response.output_item.added", + "output_index": 0, + "item": { + "id": "msg_123", + "status": "in_progress", + "type": "message", + "role": "assistant", + "content": [] + }, + "sequence_number": 1 + } + ResponseOutputItemDoneEvent: + type: object + description: Emitted when an output item is marked done. + properties: + type: + type: string + description: | + The type of the event. Always `response.output_item.done`. + enum: + - response.output_item.done + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that was marked done. + sequence_number: + type: integer + description: | + The sequence number of this event. + item: + $ref: '#/components/schemas/OutputItem' + description: | + The output item that was marked done. + required: + - type + - output_index + - item + - sequence_number + x-oaiMeta: + name: response.output_item.done + group: responses + example: | + { + "type": "response.output_item.done", + "output_index": 0, + "item": { + "id": "msg_123", + "status": "completed", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "In a shimmering forest under a sky full of stars, a lonely unicorn named Lila discovered a hidden pond that glowed with moonlight. Every night, she would leave sparkling, magical flowers by the water's edge, hoping to share her beauty with others. One enchanting evening, she woke to find a group of friendly animals gathered around, eager to be friends and share in her magic.", + "annotations": [] + } + ] + }, + "sequence_number": 1 + } + ResponseOutputTextAnnotationAddedEvent: + type: object + title: ResponseOutputTextAnnotationAddedEvent + description: | + Emitted when an annotation is added to output text content. + properties: + type: + type: string + enum: + - response.output_text.annotation.added + description: The type of the event. Always 'response.output_text.annotation.added'. + x-stainless-const: true + item_id: + type: string + description: The unique identifier of the item to which the annotation is being added. + output_index: + type: integer + description: The index of the output item in the response's output array. + content_index: + type: integer + description: The index of the content part within the output item. + annotation_index: + type: integer + description: The index of the annotation within the content part. + sequence_number: + type: integer + description: The sequence number of this event. + annotation: + type: object + description: The annotation object being added. (See annotation schema for details.) + required: + - type + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + x-oaiMeta: + name: response.output_text.annotation.added + group: responses + example: | + { + "type": "response.output_text.annotation.added", + "item_id": "item-abc", + "output_index": 0, + "content_index": 0, + "annotation_index": 0, + "annotation": { + "type": "text_annotation", + "text": "This is a test annotation", + "start": 0, + "end": 10 + }, + "sequence_number": 1 + } + ResponsePromptVariables: + anyOf: + - type: object + title: Prompt Variables + description: | + Optional map of values to substitute in for variables in your + prompt. The substitution values can either be strings, or other + Response input types like images or files. + x-oaiExpandable: true + x-oaiTypeLabel: map + additionalProperties: + x-oaiExpandable: true + x-oaiTypeLabel: map + anyOf: + - type: string + - $ref: '#/components/schemas/InputTextContent' + - $ref: '#/components/schemas/InputImageContent' + - $ref: '#/components/schemas/InputFileContent' + - type: 'null' + ResponseProperties: + type: object + properties: + previous_response_id: + anyOf: + - type: string + description: > + The unique ID of the previous response to the model. Use this to + + create multi-turn conversations. Learn more about + + [conversation state](https://platform.openai.com/docs/guides/conversation-state). Cannot be + used in conjunction with `conversation`. + - type: 'null' + model: + description: > + Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI + + offers a wide range of models with different capabilities, performance + + characteristics, and price points. Refer to the [model + guide](https://platform.openai.com/docs/models) + + to browse and compare available models. + $ref: '#/components/schemas/ModelIdsResponses' + reasoning: + anyOf: + - $ref: '#/components/schemas/Reasoning' + - type: 'null' + background: + anyOf: + - type: boolean + description: | + Whether to run the model response in the background. + [Learn more](https://platform.openai.com/docs/guides/background). + default: false + - type: 'null' + max_output_tokens: + anyOf: + - description: > + An upper bound for the number of tokens that can be generated for a response, including + visible output tokens and [reasoning + tokens](https://platform.openai.com/docs/guides/reasoning). + type: integer + - type: 'null' + max_tool_calls: + anyOf: + - description: > + The maximum number of total calls to built-in tools that can be processed in a response. This + maximum number applies across all built-in tool calls, not per individual tool. Any further + attempts to call a tool by the model will be ignored. + type: integer + - type: 'null' + text: + $ref: '#/components/schemas/ResponseTextParam' + tools: + $ref: '#/components/schemas/ToolsArray' + tool_choice: + $ref: '#/components/schemas/ToolChoiceParam' + prompt: + $ref: '#/components/schemas/Prompt' + truncation: + anyOf: + - type: string + description: | + The truncation strategy to use for the model response. + - `auto`: If the input to this Response exceeds + the model's context window size, the model will truncate the + response to fit the context window by dropping items from the beginning of the conversation. + - `disabled` (default): If the input size will exceed the context window + size for a model, the request will fail with a 400 error. + enum: + - auto + - disabled + default: disabled + - type: 'null' + ResponseQueuedEvent: + type: object + title: ResponseQueuedEvent + description: | + Emitted when a response is queued and waiting to be processed. + properties: + type: + type: string + enum: + - response.queued + description: The type of the event. Always 'response.queued'. + x-stainless-const: true + response: + $ref: '#/components/schemas/Response' + description: The full response object that is queued. + sequence_number: + type: integer + description: The sequence number for this event. + required: + - type + - response + - sequence_number + x-oaiMeta: + name: response.queued + group: responses + example: | + { + "type": "response.queued", + "response": { + "id": "res_123", + "status": "queued", + "created_at": "2021-01-01T00:00:00Z", + "updated_at": "2021-01-01T00:00:00Z" + }, + "sequence_number": 1 + } + ResponseReasoningSummaryPartAddedEvent: + type: object + description: Emitted when a new reasoning summary part is added. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_summary_part.added`. + enum: + - response.reasoning_summary_part.added + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this summary part is associated with. + output_index: + type: integer + description: | + The index of the output item this summary part is associated with. + summary_index: + type: integer + description: | + The index of the summary part within the reasoning summary. + sequence_number: + type: integer + description: | + The sequence number of this event. + part: + type: object + description: | + The summary part that was added. + properties: + type: + type: string + description: The type of the summary part. Always `summary_text`. + enum: + - summary_text + x-stainless-const: true + text: + type: string + description: The text of the summary part. + required: + - type + - text + required: + - type + - item_id + - output_index + - summary_index + - part + - sequence_number + x-oaiMeta: + name: response.reasoning_summary_part.added + group: responses + example: | + { + "type": "response.reasoning_summary_part.added", + "item_id": "rs_6806bfca0b2481918a5748308061a2600d3ce51bdffd5476", + "output_index": 0, + "summary_index": 0, + "part": { + "type": "summary_text", + "text": "" + }, + "sequence_number": 1 + } + ResponseReasoningSummaryPartDoneEvent: + type: object + description: Emitted when a reasoning summary part is completed. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_summary_part.done`. + enum: + - response.reasoning_summary_part.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this summary part is associated with. + output_index: + type: integer + description: | + The index of the output item this summary part is associated with. + summary_index: + type: integer + description: | + The index of the summary part within the reasoning summary. + sequence_number: + type: integer + description: | + The sequence number of this event. + part: + type: object + description: | + The completed summary part. + properties: + type: + type: string + description: The type of the summary part. Always `summary_text`. + enum: + - summary_text + x-stainless-const: true + text: + type: string + description: The text of the summary part. + required: + - type + - text + required: + - type + - item_id + - output_index + - summary_index + - part + - sequence_number + x-oaiMeta: + name: response.reasoning_summary_part.done + group: responses + example: | + { + "type": "response.reasoning_summary_part.done", + "item_id": "rs_6806bfca0b2481918a5748308061a2600d3ce51bdffd5476", + "output_index": 0, + "summary_index": 0, + "part": { + "type": "summary_text", + "text": "**Responding to a greeting**\n\nThe user just said, \"Hello!\" So, it seems I need to engage. I'll greet them back and offer help since they're looking to chat. I could say something like, \"Hello! How can I assist you today?\" That feels friendly and open. They didn't ask a specific question, so this approach will work well for starting a conversation. Let's see where it goes from there!" + }, + "sequence_number": 1 + } + ResponseReasoningSummaryTextDeltaEvent: + type: object + description: Emitted when a delta is added to a reasoning summary text. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_summary_text.delta`. + enum: + - response.reasoning_summary_text.delta + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this summary text delta is associated with. + output_index: + type: integer + description: | + The index of the output item this summary text delta is associated with. + summary_index: + type: integer + description: | + The index of the summary part within the reasoning summary. + delta: + type: string + description: | + The text delta that was added to the summary. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - summary_index + - delta + - sequence_number + x-oaiMeta: + name: response.reasoning_summary_text.delta + group: responses + example: | + { + "type": "response.reasoning_summary_text.delta", + "item_id": "rs_6806bfca0b2481918a5748308061a2600d3ce51bdffd5476", + "output_index": 0, + "summary_index": 0, + "delta": "**Responding to a greeting**\n\nThe user just said, \"Hello!\" So, it seems I need to engage. I'll greet them back and offer help since they're looking to chat. I could say something like, \"Hello! How can I assist you today?\" That feels friendly and open. They didn't ask a specific question, so this approach will work well for starting a conversation. Let's see where it goes from there!", + "sequence_number": 1 + } + ResponseReasoningSummaryTextDoneEvent: + type: object + description: Emitted when a reasoning summary text is completed. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_summary_text.done`. + enum: + - response.reasoning_summary_text.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this summary text is associated with. + output_index: + type: integer + description: | + The index of the output item this summary text is associated with. + summary_index: + type: integer + description: | + The index of the summary part within the reasoning summary. + text: + type: string + description: | + The full text of the completed reasoning summary. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - summary_index + - text + - sequence_number + x-oaiMeta: + name: response.reasoning_summary_text.done + group: responses + example: | + { + "type": "response.reasoning_summary_text.done", + "item_id": "rs_6806bfca0b2481918a5748308061a2600d3ce51bdffd5476", + "output_index": 0, + "summary_index": 0, + "text": "**Responding to a greeting**\n\nThe user just said, \"Hello!\" So, it seems I need to engage. I'll greet them back and offer help since they're looking to chat. I could say something like, \"Hello! How can I assist you today?\" That feels friendly and open. They didn't ask a specific question, so this approach will work well for starting a conversation. Let's see where it goes from there!", + "sequence_number": 1 + } + ResponseReasoningTextDeltaEvent: + type: object + description: Emitted when a delta is added to a reasoning text. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_text.delta`. + enum: + - response.reasoning_text.delta + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this reasoning text delta is associated with. + output_index: + type: integer + description: | + The index of the output item this reasoning text delta is associated with. + content_index: + type: integer + description: | + The index of the reasoning content part this delta is associated with. + delta: + type: string + description: | + The text delta that was added to the reasoning content. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - content_index + - delta + - sequence_number + x-oaiMeta: + name: response.reasoning_text.delta + group: responses + example: | + { + "type": "response.reasoning_text.delta", + "item_id": "rs_123", + "output_index": 0, + "content_index": 0, + "delta": "The", + "sequence_number": 1 + } + ResponseReasoningTextDoneEvent: + type: object + description: Emitted when a reasoning text is completed. + properties: + type: + type: string + description: | + The type of the event. Always `response.reasoning_text.done`. + enum: + - response.reasoning_text.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the item this reasoning text is associated with. + output_index: + type: integer + description: | + The index of the output item this reasoning text is associated with. + content_index: + type: integer + description: | + The index of the reasoning content part. + text: + type: string + description: | + The full text of the completed reasoning content. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - content_index + - text + - sequence_number + x-oaiMeta: + name: response.reasoning_text.done + group: responses + example: | + { + "type": "response.reasoning_text.done", + "item_id": "rs_123", + "output_index": 0, + "content_index": 0, + "text": "The user is asking...", + "sequence_number": 4 + } + ResponseRefusalDeltaEvent: + type: object + description: Emitted when there is a partial refusal text. + properties: + type: + type: string + description: | + The type of the event. Always `response.refusal.delta`. + enum: + - response.refusal.delta + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the refusal text is added to. + output_index: + type: integer + description: | + The index of the output item that the refusal text is added to. + content_index: + type: integer + description: | + The index of the content part that the refusal text is added to. + delta: + type: string + description: | + The refusal text that is added. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - content_index + - delta + - sequence_number + x-oaiMeta: + name: response.refusal.delta + group: responses + example: | + { + "type": "response.refusal.delta", + "item_id": "msg_123", + "output_index": 0, + "content_index": 0, + "delta": "refusal text so far", + "sequence_number": 1 + } + ResponseRefusalDoneEvent: + type: object + description: Emitted when refusal text is finalized. + properties: + type: + type: string + description: | + The type of the event. Always `response.refusal.done`. + enum: + - response.refusal.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the refusal text is finalized. + output_index: + type: integer + description: | + The index of the output item that the refusal text is finalized. + content_index: + type: integer + description: | + The index of the content part that the refusal text is finalized. + refusal: + type: string + description: | + The refusal text that is finalized. + sequence_number: + type: integer + description: | + The sequence number of this event. + required: + - type + - item_id + - output_index + - content_index + - refusal + - sequence_number + x-oaiMeta: + name: response.refusal.done + group: responses + example: | + { + "type": "response.refusal.done", + "item_id": "item-abc", + "output_index": 1, + "content_index": 2, + "refusal": "final refusal text", + "sequence_number": 1 + } + ResponseStreamEvent: + anyOf: + - $ref: '#/components/schemas/ResponseAudioDeltaEvent' + - $ref: '#/components/schemas/ResponseAudioDoneEvent' + - $ref: '#/components/schemas/ResponseAudioTranscriptDeltaEvent' + - $ref: '#/components/schemas/ResponseAudioTranscriptDoneEvent' + - $ref: '#/components/schemas/ResponseCodeInterpreterCallCodeDeltaEvent' + - $ref: '#/components/schemas/ResponseCodeInterpreterCallCodeDoneEvent' + - $ref: '#/components/schemas/ResponseCodeInterpreterCallCompletedEvent' + - $ref: '#/components/schemas/ResponseCodeInterpreterCallInProgressEvent' + - $ref: '#/components/schemas/ResponseCodeInterpreterCallInterpretingEvent' + - $ref: '#/components/schemas/ResponseCompletedEvent' + - $ref: '#/components/schemas/ResponseContentPartAddedEvent' + - $ref: '#/components/schemas/ResponseContentPartDoneEvent' + - $ref: '#/components/schemas/ResponseCreatedEvent' + - $ref: '#/components/schemas/ResponseErrorEvent' + - $ref: '#/components/schemas/ResponseFileSearchCallCompletedEvent' + - $ref: '#/components/schemas/ResponseFileSearchCallInProgressEvent' + - $ref: '#/components/schemas/ResponseFileSearchCallSearchingEvent' + - $ref: '#/components/schemas/ResponseFunctionCallArgumentsDeltaEvent' + - $ref: '#/components/schemas/ResponseFunctionCallArgumentsDoneEvent' + - $ref: '#/components/schemas/ResponseInProgressEvent' + - $ref: '#/components/schemas/ResponseFailedEvent' + - $ref: '#/components/schemas/ResponseIncompleteEvent' + - $ref: '#/components/schemas/ResponseOutputItemAddedEvent' + - $ref: '#/components/schemas/ResponseOutputItemDoneEvent' + - $ref: '#/components/schemas/ResponseReasoningSummaryPartAddedEvent' + - $ref: '#/components/schemas/ResponseReasoningSummaryPartDoneEvent' + - $ref: '#/components/schemas/ResponseReasoningSummaryTextDeltaEvent' + - $ref: '#/components/schemas/ResponseReasoningSummaryTextDoneEvent' + - $ref: '#/components/schemas/ResponseReasoningTextDeltaEvent' + - $ref: '#/components/schemas/ResponseReasoningTextDoneEvent' + - $ref: '#/components/schemas/ResponseRefusalDeltaEvent' + - $ref: '#/components/schemas/ResponseRefusalDoneEvent' + - $ref: '#/components/schemas/ResponseTextDeltaEvent' + - $ref: '#/components/schemas/ResponseTextDoneEvent' + - $ref: '#/components/schemas/ResponseWebSearchCallCompletedEvent' + - $ref: '#/components/schemas/ResponseWebSearchCallInProgressEvent' + - $ref: '#/components/schemas/ResponseWebSearchCallSearchingEvent' + - $ref: '#/components/schemas/ResponseImageGenCallCompletedEvent' + - $ref: '#/components/schemas/ResponseImageGenCallGeneratingEvent' + - $ref: '#/components/schemas/ResponseImageGenCallInProgressEvent' + - $ref: '#/components/schemas/ResponseImageGenCallPartialImageEvent' + - $ref: '#/components/schemas/ResponseMCPCallArgumentsDeltaEvent' + - $ref: '#/components/schemas/ResponseMCPCallArgumentsDoneEvent' + - $ref: '#/components/schemas/ResponseMCPCallCompletedEvent' + - $ref: '#/components/schemas/ResponseMCPCallFailedEvent' + - $ref: '#/components/schemas/ResponseMCPCallInProgressEvent' + - $ref: '#/components/schemas/ResponseMCPListToolsCompletedEvent' + - $ref: '#/components/schemas/ResponseMCPListToolsFailedEvent' + - $ref: '#/components/schemas/ResponseMCPListToolsInProgressEvent' + - $ref: '#/components/schemas/ResponseOutputTextAnnotationAddedEvent' + - $ref: '#/components/schemas/ResponseQueuedEvent' + - $ref: '#/components/schemas/ResponseCustomToolCallInputDeltaEvent' + - $ref: '#/components/schemas/ResponseCustomToolCallInputDoneEvent' + discriminator: + propertyName: type + ResponseStreamOptions: + anyOf: + - description: | + Options for streaming responses. Only set this when you set `stream: true`. + type: object + properties: + include_obfuscation: + type: boolean + description: | + When true, stream obfuscation will be enabled. Stream obfuscation adds + random characters to an `obfuscation` field on streaming delta events to + normalize payload sizes as a mitigation to certain side-channel attacks. + These obfuscation fields are included by default, but add a small amount + of overhead to the data stream. You can set `include_obfuscation` to + false to optimize for bandwidth if you trust the network links between + your application and the OpenAI API. + - type: 'null' + ResponseTextDeltaEvent: + type: object + description: Emitted when there is an additional text delta. + properties: + type: + type: string + description: | + The type of the event. Always `response.output_text.delta`. + enum: + - response.output_text.delta + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the text delta was added to. + output_index: + type: integer + description: | + The index of the output item that the text delta was added to. + content_index: + type: integer + description: | + The index of the content part that the text delta was added to. + delta: + type: string + description: | + The text delta that was added. + sequence_number: + type: integer + description: The sequence number for this event. + logprobs: + type: array + description: | + The log probabilities of the tokens in the delta. + items: + $ref: '#/components/schemas/ResponseLogProb' + required: + - type + - item_id + - output_index + - content_index + - delta + - sequence_number + - logprobs + x-oaiMeta: + name: response.output_text.delta + group: responses + example: | + { + "type": "response.output_text.delta", + "item_id": "msg_123", + "output_index": 0, + "content_index": 0, + "delta": "In", + "sequence_number": 1 + } + ResponseTextDoneEvent: + type: object + description: Emitted when text content is finalized. + properties: + type: + type: string + description: | + The type of the event. Always `response.output_text.done`. + enum: + - response.output_text.done + x-stainless-const: true + item_id: + type: string + description: | + The ID of the output item that the text content is finalized. + output_index: + type: integer + description: | + The index of the output item that the text content is finalized. + content_index: + type: integer + description: | + The index of the content part that the text content is finalized. + text: + type: string + description: | + The text content that is finalized. + sequence_number: + type: integer + description: The sequence number for this event. + logprobs: + type: array + description: | + The log probabilities of the tokens in the delta. + items: + $ref: '#/components/schemas/ResponseLogProb' + required: + - type + - item_id + - output_index + - content_index + - text + - sequence_number + - logprobs + x-oaiMeta: + name: response.output_text.done + group: responses + example: | + { + "type": "response.output_text.done", + "item_id": "msg_123", + "output_index": 0, + "content_index": 0, + "text": "In a shimmering forest under a sky full of stars, a lonely unicorn named Lila discovered a hidden pond that glowed with moonlight. Every night, she would leave sparkling, magical flowers by the water's edge, hoping to share her beauty with others. One enchanting evening, she woke to find a group of friendly animals gathered around, eager to be friends and share in her magic.", + "sequence_number": 1 + } + ResponseTextParam: + type: object + description: | + Configuration options for a text response from the model. Can be plain + text or structured JSON data. Learn more: + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) + properties: + format: + $ref: '#/components/schemas/TextResponseFormatConfiguration' + verbosity: + $ref: '#/components/schemas/Verbosity' + ResponseUsage: + type: object + description: | + Represents token usage details including input tokens, output tokens, + a breakdown of output tokens, and the total tokens used. + properties: + input_tokens: + type: integer + description: The number of input tokens. + input_tokens_details: + type: object + description: A detailed breakdown of the input tokens. + properties: + cached_tokens: + type: integer + description: | + The number of tokens that were retrieved from the cache. + [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching). + required: + - cached_tokens + output_tokens: + type: integer + description: The number of output tokens. + output_tokens_details: + type: object + description: A detailed breakdown of the output tokens. + properties: + reasoning_tokens: + type: integer + description: The number of reasoning tokens. + required: + - reasoning_tokens + total_tokens: + type: integer + description: The total number of tokens used. + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + ResponseWebSearchCallCompletedEvent: + type: object + description: Emitted when a web search call is completed. + properties: + type: + type: string + description: | + The type of the event. Always `response.web_search_call.completed`. + enum: + - response.web_search_call.completed + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the web search call is associated with. + item_id: + type: string + description: | + Unique ID for the output item associated with the web search call. + sequence_number: + type: integer + description: The sequence number of the web search call being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.web_search_call.completed + group: responses + example: | + { + "type": "response.web_search_call.completed", + "output_index": 0, + "item_id": "ws_123", + "sequence_number": 0 + } + ResponseWebSearchCallInProgressEvent: + type: object + description: Emitted when a web search call is initiated. + properties: + type: + type: string + description: | + The type of the event. Always `response.web_search_call.in_progress`. + enum: + - response.web_search_call.in_progress + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the web search call is associated with. + item_id: + type: string + description: | + Unique ID for the output item associated with the web search call. + sequence_number: + type: integer + description: The sequence number of the web search call being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.web_search_call.in_progress + group: responses + example: | + { + "type": "response.web_search_call.in_progress", + "output_index": 0, + "item_id": "ws_123", + "sequence_number": 0 + } + ResponseWebSearchCallSearchingEvent: + type: object + description: Emitted when a web search call is executing. + properties: + type: + type: string + description: | + The type of the event. Always `response.web_search_call.searching`. + enum: + - response.web_search_call.searching + x-stainless-const: true + output_index: + type: integer + description: | + The index of the output item that the web search call is associated with. + item_id: + type: string + description: | + Unique ID for the output item associated with the web search call. + sequence_number: + type: integer + description: The sequence number of the web search call being processed. + required: + - type + - output_index + - item_id + - sequence_number + x-oaiMeta: + name: response.web_search_call.searching + group: responses + example: | + { + "type": "response.web_search_call.searching", + "output_index": 0, + "item_id": "ws_123", + "sequence_number": 0 + } + RunCompletionUsage: + anyOf: + - type: object + description: >- + Usage statistics related to the run. This value will be `null` if the run is not in a terminal + state (i.e. `in_progress`, `queued`, etc.). + properties: + completion_tokens: + type: integer + description: Number of completion tokens used over the course of the run. + prompt_tokens: + type: integer + description: Number of prompt tokens used over the course of the run. + total_tokens: + type: integer + description: Total number of tokens used (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + - type: 'null' + RunGraderRequest: + type: object + title: RunGraderRequest + properties: + grader: + type: object + description: The grader used for the fine-tuning job. + anyOf: + - $ref: '#/components/schemas/GraderStringCheck' + - $ref: '#/components/schemas/GraderTextSimilarity' + - $ref: '#/components/schemas/GraderPython' + - $ref: '#/components/schemas/GraderScoreModel' + - $ref: '#/components/schemas/GraderMulti' + discriminator: + propertyName: type + item: + type: object + description: > + The dataset item provided to the grader. This will be used to populate + + the `item` namespace. See [the guide](https://platform.openai.com/docs/guides/graders) for more + details. + model_sample: + type: string + description: > + The model sample to be evaluated. This value will be used to populate + + the `sample` namespace. See [the guide](https://platform.openai.com/docs/guides/graders) for more + details. + + The `output_json` variable will be populated if the model sample is a + + valid JSON string. + + required: + - grader + - model_sample + RunGraderResponse: + type: object + properties: + reward: + type: number + metadata: + type: object + properties: + name: + type: string + type: + type: string + errors: + type: object + properties: + formula_parse_error: + type: boolean + sample_parse_error: + type: boolean + truncated_observation_error: + type: boolean + unresponsive_reward_error: + type: boolean + invalid_variable_error: + type: boolean + other_error: + type: boolean + python_grader_server_error: + type: boolean + python_grader_server_error_type: + anyOf: + - type: string + - type: 'null' + python_grader_runtime_error: + type: boolean + python_grader_runtime_error_details: + anyOf: + - type: string + - type: 'null' + model_grader_server_error: + type: boolean + model_grader_refusal_error: + type: boolean + model_grader_parse_error: + type: boolean + model_grader_server_error_details: + anyOf: + - type: string + - type: 'null' + required: + - formula_parse_error + - sample_parse_error + - truncated_observation_error + - unresponsive_reward_error + - invalid_variable_error + - other_error + - python_grader_server_error + - python_grader_server_error_type + - python_grader_runtime_error + - python_grader_runtime_error_details + - model_grader_server_error + - model_grader_refusal_error + - model_grader_parse_error + - model_grader_server_error_details + execution_time: + type: number + scores: + type: object + additionalProperties: {} + token_usage: + anyOf: + - type: integer + - type: 'null' + sampled_model_name: + anyOf: + - type: string + - type: 'null' + required: + - name + - type + - errors + - execution_time + - scores + - token_usage + - sampled_model_name + sub_rewards: + type: object + additionalProperties: {} + model_grader_token_usage_per_model: + type: object + additionalProperties: {} + required: + - reward + - metadata + - sub_rewards + - model_grader_token_usage_per_model + RunObject: + type: object + title: A run on a thread + description: Represents an execution run on a [thread](https://platform.openai.com/docs/api-reference/threads). + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread.run`. + type: string + enum: + - thread.run + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the run was created. + type: integer + thread_id: + description: >- + The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was executed + on as a part of this run. + type: string + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for + execution of this run. + type: string + status: + $ref: '#/components/schemas/RunStatus' + required_action: + type: object + description: Details on the action required to continue the run. Will be `null` if no action is required. + nullable: true + properties: + type: + description: For now, this is always `submit_tool_outputs`. + type: string + enum: + - submit_tool_outputs + x-stainless-const: true + submit_tool_outputs: + type: object + description: Details on the tool outputs needed for this run to continue. + properties: + tool_calls: + type: array + description: A list of the relevant tool calls. + items: + $ref: '#/components/schemas/RunToolCallObject' + required: + - tool_calls + required: + - type + - submit_tool_outputs + last_error: + type: object + description: The last error associated with this run. Will be `null` if there are no errors. + nullable: true + properties: + code: + type: string + description: One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`. + enum: + - server_error + - rate_limit_exceeded + - invalid_prompt + message: + type: string + description: A human-readable description of the error. + required: + - code + - message + expires_at: + description: The Unix timestamp (in seconds) for when the run will expire. + type: integer + nullable: true + started_at: + description: The Unix timestamp (in seconds) for when the run was started. + type: integer + nullable: true + cancelled_at: + description: The Unix timestamp (in seconds) for when the run was cancelled. + type: integer + nullable: true + failed_at: + description: The Unix timestamp (in seconds) for when the run failed. + type: integer + nullable: true + completed_at: + description: The Unix timestamp (in seconds) for when the run was completed. + type: integer + nullable: true + incomplete_details: + description: Details on why the run is incomplete. Will be `null` if the run is not incomplete. + type: object + nullable: true + properties: + reason: + description: >- + The reason why the run is incomplete. This will point to which specific token limit was + reached over the course of the run. + type: string + enum: + - max_completion_tokens + - max_prompt_tokens + model: + description: >- + The model that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for + this run. + type: string + instructions: + description: >- + The instructions that the [assistant](https://platform.openai.com/docs/api-reference/assistants) + used for this run. + type: string + tools: + description: >- + The list of tools that the [assistant](https://platform.openai.com/docs/api-reference/assistants) + used for this run. + default: [] + type: array + maxItems: 20 + items: + $ref: '#/components/schemas/AssistantTool' + metadata: + $ref: '#/components/schemas/Metadata' + usage: + $ref: '#/components/schemas/RunCompletionUsage' + temperature: + description: The sampling temperature used for this run. If not set, defaults to 1. + type: number + nullable: true + top_p: + description: The nucleus sampling value used for this run. If not set, defaults to 1. + type: number + nullable: true + max_prompt_tokens: + type: integer + nullable: true + description: | + The maximum number of prompt tokens specified to have been used over the course of the run. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: | + The maximum number of completion tokens specified to have been used over the course of the run. + minimum: 256 + truncation_strategy: + allOf: + - $ref: '#/components/schemas/TruncationObject' + - nullable: true + tool_choice: + allOf: + - $ref: '#/components/schemas/AssistantsApiToolChoiceOption' + - nullable: true + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + response_format: + $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + nullable: true + required: + - id + - object + - created_at + - thread_id + - assistant_id + - status + - required_action + - last_error + - expires_at + - started_at + - cancelled_at + - failed_at + - completed_at + - model + - instructions + - tools + - metadata + - usage + - incomplete_details + - max_prompt_tokens + - max_completion_tokens + - truncation_strategy + - tool_choice + - parallel_tool_calls + - response_format + x-oaiMeta: + name: The run object + beta: true + example: | + { + "id": "run_abc123", + "object": "thread.run", + "created_at": 1698107661, + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "status": "completed", + "started_at": 1699073476, + "expires_at": null, + "cancelled_at": null, + "failed_at": null, + "completed_at": 1699073498, + "last_error": null, + "model": "gpt-4o", + "instructions": null, + "tools": [{"type": "file_search"}, {"type": "code_interpreter"}], + "metadata": {}, + "incomplete_details": null, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + }, + "temperature": 1.0, + "top_p": 1.0, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto", + "parallel_tool_calls": true + } + RunStepCompletionUsage: + anyOf: + - type: object + description: >- + Usage statistics related to the run step. This value will be `null` while the run step's status is + `in_progress`. + properties: + completion_tokens: + type: integer + description: Number of completion tokens used over the course of the run step. + prompt_tokens: + type: integer + description: Number of prompt tokens used over the course of the run step. + total_tokens: + type: integer + description: Total number of tokens used (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + - type: 'null' + RunStepDeltaObject: + type: object + title: Run step delta object + description: | + Represents a run step delta i.e. any changed fields on a run step during streaming. + properties: + id: + description: The identifier of the run step, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread.run.step.delta`. + type: string + enum: + - thread.run.step.delta + x-stainless-const: true + delta: + $ref: '#/components/schemas/RunStepDeltaObjectDelta' + required: + - id + - object + - delta + x-oaiMeta: + name: The run step delta object + beta: true + example: | + { + "id": "step_123", + "object": "thread.run.step.delta", + "delta": { + "step_details": { + "type": "tool_calls", + "tool_calls": [ + { + "index": 0, + "id": "call_123", + "type": "code_interpreter", + "code_interpreter": { "input": "", "outputs": [] } + } + ] + } + } + } + RunStepDeltaStepDetailsMessageCreationObject: + title: Message creation + type: object + description: Details of the message creation by the run step. + properties: + type: + description: Always `message_creation`. + type: string + enum: + - message_creation + x-stainless-const: true + message_creation: + type: object + properties: + message_id: + type: string + description: The ID of the message that was created by this run step. + required: + - type + RunStepDeltaStepDetailsToolCallsCodeObject: + title: Code interpreter tool call + type: object + description: Details of the Code Interpreter tool call the run step was involved in. + properties: + index: + type: integer + description: The index of the tool call in the tool calls array. + id: + type: string + description: The ID of the tool call. + type: + type: string + description: The type of tool call. This is always going to be `code_interpreter` for this type of tool call. + enum: + - code_interpreter + x-stainless-const: true + code_interpreter: + type: object + description: The Code Interpreter tool call definition. + properties: + input: + type: string + description: The input to the Code Interpreter tool call. + outputs: + type: array + description: >- + The outputs from the Code Interpreter tool call. Code Interpreter can output one or more + items, including text (`logs`) or images (`image`). Each of these are represented by a + different object type. + items: + type: object + anyOf: + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsCodeOutputLogsObject' + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsCodeOutputImageObject' + discriminator: + propertyName: type + required: + - index + - type + RunStepDeltaStepDetailsToolCallsCodeOutputImageObject: + title: Code interpreter image output + type: object + properties: + index: + type: integer + description: The index of the output in the outputs array. + type: + description: Always `image`. + type: string + enum: + - image + x-stainless-const: true + image: + type: object + properties: + file_id: + description: The [file](https://platform.openai.com/docs/api-reference/files) ID of the image. + type: string + required: + - index + - type + RunStepDeltaStepDetailsToolCallsCodeOutputLogsObject: + title: Code interpreter log output + type: object + description: Text output from the Code Interpreter tool call as part of a run step. + properties: + index: + type: integer + description: The index of the output in the outputs array. + type: + description: Always `logs`. + type: string + enum: + - logs + x-stainless-const: true + logs: + type: string + description: The text output from the Code Interpreter tool call. + required: + - index + - type + RunStepDeltaStepDetailsToolCallsFileSearchObject: + title: File search tool call + type: object + properties: + index: + type: integer + description: The index of the tool call in the tool calls array. + id: + type: string + description: The ID of the tool call object. + type: + type: string + description: The type of tool call. This is always going to be `file_search` for this type of tool call. + enum: + - file_search + x-stainless-const: true + file_search: + type: object + description: For now, this is always going to be an empty object. + x-oaiTypeLabel: map + required: + - index + - type + - file_search + RunStepDeltaStepDetailsToolCallsFunctionObject: + type: object + title: Function tool call + properties: + index: + type: integer + description: The index of the tool call in the tool calls array. + id: + type: string + description: The ID of the tool call object. + type: + type: string + description: The type of tool call. This is always going to be `function` for this type of tool call. + enum: + - function + x-stainless-const: true + function: + type: object + description: The definition of the function that was called. + properties: + name: + type: string + description: The name of the function. + arguments: + type: string + description: The arguments passed to the function. + output: + anyOf: + - type: string + description: >- + The output of the function. This will be `null` if the outputs have not been + [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) yet. + - type: 'null' + required: + - index + - type + RunStepDeltaStepDetailsToolCallsObject: + title: Tool calls + type: object + description: Details of the tool call. + properties: + type: + description: Always `tool_calls`. + type: string + enum: + - tool_calls + x-stainless-const: true + tool_calls: + type: array + description: > + An array of tool calls the run step was involved in. These can be associated with one of three + types of tools: `code_interpreter`, `file_search`, or `function`. + items: + $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCall' + required: + - type + RunStepDetailsMessageCreationObject: + title: Message creation + type: object + description: Details of the message creation by the run step. + properties: + type: + description: Always `message_creation`. + type: string + enum: + - message_creation + x-stainless-const: true + message_creation: + type: object + properties: + message_id: + type: string + description: The ID of the message that was created by this run step. + required: + - message_id + required: + - type + - message_creation + RunStepDetailsToolCallsCodeObject: + title: Code Interpreter tool call + type: object + description: Details of the Code Interpreter tool call the run step was involved in. + properties: + id: + type: string + description: The ID of the tool call. + type: + type: string + description: The type of tool call. This is always going to be `code_interpreter` for this type of tool call. + enum: + - code_interpreter + x-stainless-const: true + code_interpreter: + type: object + description: The Code Interpreter tool call definition. + required: + - input + - outputs + properties: + input: + type: string + description: The input to the Code Interpreter tool call. + outputs: + type: array + description: >- + The outputs from the Code Interpreter tool call. Code Interpreter can output one or more + items, including text (`logs`) or images (`image`). Each of these are represented by a + different object type. + items: + type: object + anyOf: + - $ref: '#/components/schemas/RunStepDetailsToolCallsCodeOutputLogsObject' + - $ref: '#/components/schemas/RunStepDetailsToolCallsCodeOutputImageObject' + discriminator: + propertyName: type + required: + - id + - type + - code_interpreter + RunStepDetailsToolCallsCodeOutputImageObject: + title: Code Interpreter image output + type: object + properties: + type: + description: Always `image`. + type: string + enum: + - image + x-stainless-const: true + image: + type: object + properties: + file_id: + description: The [file](https://platform.openai.com/docs/api-reference/files) ID of the image. + type: string + required: + - file_id + required: + - type + - image + x-stainless-naming: + java: + type_name: ImageOutput + kotlin: + type_name: ImageOutput + RunStepDetailsToolCallsCodeOutputLogsObject: + title: Code Interpreter log output + type: object + description: Text output from the Code Interpreter tool call as part of a run step. + properties: + type: + description: Always `logs`. + type: string + enum: + - logs + x-stainless-const: true + logs: + type: string + description: The text output from the Code Interpreter tool call. + required: + - type + - logs + x-stainless-naming: + java: + type_name: LogsOutput + kotlin: + type_name: LogsOutput + RunStepDetailsToolCallsFileSearchObject: + title: File search tool call + type: object + properties: + id: + type: string + description: The ID of the tool call object. + type: + type: string + description: The type of tool call. This is always going to be `file_search` for this type of tool call. + enum: + - file_search + x-stainless-const: true + file_search: + type: object + description: For now, this is always going to be an empty object. + x-oaiTypeLabel: map + properties: + ranking_options: + $ref: '#/components/schemas/RunStepDetailsToolCallsFileSearchRankingOptionsObject' + results: + type: array + description: The results of the file search. + items: + $ref: '#/components/schemas/RunStepDetailsToolCallsFileSearchResultObject' + required: + - id + - type + - file_search + RunStepDetailsToolCallsFileSearchRankingOptionsObject: + title: File search tool call ranking options + type: object + description: The ranking options for the file search. + properties: + ranker: + $ref: '#/components/schemas/FileSearchRanker' + score_threshold: + type: number + description: >- + The score threshold for the file search. All values must be a floating point number between 0 and + 1. + minimum: 0 + maximum: 1 + required: + - ranker + - score_threshold + RunStepDetailsToolCallsFileSearchResultObject: + title: File search tool call result + type: object + description: A result instance of the file search. + x-oaiTypeLabel: map + properties: + file_id: + type: string + description: The ID of the file that result was found in. + file_name: + type: string + description: The name of the file that result was found in. + score: + type: number + description: The score of the result. All values must be a floating point number between 0 and 1. + minimum: 0 + maximum: 1 + content: + type: array + description: >- + The content of the result that was found. The content is only included if requested via the + include query parameter. + items: + type: object + properties: + type: + type: string + description: The type of the content. + enum: + - text + x-stainless-const: true + text: + type: string + description: The text content of the file. + required: + - file_id + - file_name + - score + RunStepDetailsToolCallsFunctionObject: + type: object + title: Function tool call + properties: + id: + type: string + description: The ID of the tool call object. + type: + type: string + description: The type of tool call. This is always going to be `function` for this type of tool call. + enum: + - function + x-stainless-const: true + function: + type: object + description: The definition of the function that was called. + properties: + name: + type: string + description: The name of the function. + arguments: + type: string + description: The arguments passed to the function. + output: + anyOf: + - type: string + description: >- + The output of the function. This will be `null` if the outputs have not been + [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) yet. + - type: 'null' + required: + - name + - arguments + - output + required: + - id + - type + - function + RunStepDetailsToolCallsObject: + title: Tool calls + type: object + description: Details of the tool call. + properties: + type: + description: Always `tool_calls`. + type: string + enum: + - tool_calls + x-stainless-const: true + tool_calls: + type: array + description: > + An array of tool calls the run step was involved in. These can be associated with one of three + types of tools: `code_interpreter`, `file_search`, or `function`. + items: + $ref: '#/components/schemas/RunStepDetailsToolCall' + required: + - type + - tool_calls + RunStepObject: + type: object + title: Run steps + description: | + Represents a step in execution of a run. + properties: + id: + description: The identifier of the run step, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread.run.step`. + type: string + enum: + - thread.run.step + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the run step was created. + type: integer + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) associated + with the run step. + type: string + thread_id: + description: The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was run. + type: string + run_id: + description: >- + The ID of the [run](https://platform.openai.com/docs/api-reference/runs) that this run step is a + part of. + type: string + type: + description: The type of run step, which can be either `message_creation` or `tool_calls`. + type: string + enum: + - message_creation + - tool_calls + status: + description: >- + The status of the run step, which can be either `in_progress`, `cancelled`, `failed`, `completed`, + or `expired`. + type: string + enum: + - in_progress + - cancelled + - failed + - completed + - expired + step_details: + type: object + description: The details of the run step. + anyOf: + - $ref: '#/components/schemas/RunStepDetailsMessageCreationObject' + - $ref: '#/components/schemas/RunStepDetailsToolCallsObject' + discriminator: + propertyName: type + last_error: + anyOf: + - type: object + description: The last error associated with this run step. Will be `null` if there are no errors. + properties: + code: + type: string + description: One of `server_error` or `rate_limit_exceeded`. + enum: + - server_error + - rate_limit_exceeded + message: + type: string + description: A human-readable description of the error. + required: + - code + - message + - type: 'null' + expired_at: + anyOf: + - description: >- + The Unix timestamp (in seconds) for when the run step expired. A step is considered expired if + the parent run is expired. + type: integer + - type: 'null' + cancelled_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the run step was cancelled. + type: integer + - type: 'null' + failed_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the run step failed. + type: integer + - type: 'null' + completed_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the run step completed. + type: integer + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + usage: + $ref: '#/components/schemas/RunStepCompletionUsage' + required: + - id + - object + - created_at + - assistant_id + - thread_id + - run_id + - type + - status + - step_details + - last_error + - expired_at + - cancelled_at + - failed_at + - completed_at + - metadata + - usage + x-oaiMeta: + name: The run step object + beta: true + example: | + { + "id": "step_abc123", + "object": "thread.run.step", + "created_at": 1699063291, + "run_id": "run_abc123", + "assistant_id": "asst_abc123", + "thread_id": "thread_abc123", + "type": "message_creation", + "status": "completed", + "cancelled_at": null, + "completed_at": 1699063291, + "expired_at": null, + "failed_at": null, + "last_error": null, + "step_details": { + "type": "message_creation", + "message_creation": { + "message_id": "msg_abc123" + } + }, + "usage": { + "prompt_tokens": 123, + "completion_tokens": 456, + "total_tokens": 579 + } + } + RunStepStreamEvent: + anyOf: + - type: object + properties: + event: + type: string + enum: + - thread.run.step.created + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is + created. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.in_progress + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) + moves to an `in_progress` state. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.delta + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepDeltaObject' + required: + - event + - data + description: >- + Occurs when parts of a [run + step](https://platform.openai.com/docs/api-reference/run-steps/step-object) are being streamed. + x-oaiMeta: + dataDescription: '`data` is a [run step delta](/docs/api-reference/assistants-streaming/run-step-delta-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.completed + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is + completed. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.failed + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) + fails. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.cancelled + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is + cancelled. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.step.expired + x-stainless-const: true + data: + $ref: '#/components/schemas/RunStepObject' + required: + - event + - data + description: >- + Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) + expires. + x-oaiMeta: + dataDescription: '`data` is a [run step](/docs/api-reference/run-steps/step-object)' + discriminator: + propertyName: event + RunStreamEvent: + anyOf: + - type: object + properties: + event: + type: string + enum: + - thread.run.created + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: Occurs when a new [run](https://platform.openai.com/docs/api-reference/runs/object) is created. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.queued + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: >- + Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a + `queued` status. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.in_progress + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: >- + Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to an + `in_progress` status. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.requires_action + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: >- + Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a + `requires_action` status. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.completed + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is completed. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.incomplete + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: >- + Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) ends with status + `incomplete`. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.failed + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) fails. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.cancelling + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: >- + Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a + `cancelling` status. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.cancelled + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is cancelled. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + - type: object + properties: + event: + type: string + enum: + - thread.run.expired + x-stainless-const: true + data: + $ref: '#/components/schemas/RunObject' + required: + - event + - data + description: Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) expires. + x-oaiMeta: + dataDescription: '`data` is a [run](/docs/api-reference/runs/object)' + discriminator: + propertyName: event + RunToolCallObject: + type: object + description: Tool call objects + properties: + id: + type: string + description: >- + The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the + [Submit tool outputs to + run](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) endpoint. + type: + type: string + description: The type of tool call the output is required for. For now, this is always `function`. + enum: + - function + x-stainless-const: true + function: + type: object + description: The function definition. + properties: + name: + type: string + description: The name of the function. + arguments: + type: string + description: The arguments that the model expects you to pass to the function. + required: + - name + - arguments + required: + - id + - type + - function + Screenshot: + type: object + title: Screenshot + description: | + A screenshot action. + properties: + type: + type: string + enum: + - screenshot + default: screenshot + description: | + Specifies the event type. For a screenshot action, this property is + always set to `screenshot`. + x-stainless-const: true + required: + - type + Scroll: + type: object + title: Scroll + description: | + A scroll action. + properties: + type: + type: string + enum: + - scroll + default: scroll + description: | + Specifies the event type. For a scroll action, this property is + always set to `scroll`. + x-stainless-const: true + x: + type: integer + description: | + The x-coordinate where the scroll occurred. + 'y': + type: integer + description: | + The y-coordinate where the scroll occurred. + scroll_x: + type: integer + description: | + The horizontal scroll distance. + scroll_y: + type: integer + description: | + The vertical scroll distance. + required: + - type + - x + - 'y' + - scroll_x + - scroll_y + ServiceTier: + anyOf: + - type: string + description: | + Specifies the processing type used for serving the request. + - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'. + - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model. + - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier. + - When not set, the default behavior is 'auto'. + + When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter. + enum: + - auto + - default + - flex + - scale + - priority + default: auto + - type: 'null' + SpeechAudioDeltaEvent: + type: object + description: Emitted for each chunk of audio data generated during speech synthesis. + properties: + type: + type: string + description: | + The type of the event. Always `speech.audio.delta`. + enum: + - speech.audio.delta + x-stainless-const: true + audio: + type: string + description: | + A chunk of Base64-encoded audio data. + required: + - type + - audio + x-oaiMeta: + name: Stream Event (speech.audio.delta) + group: speech + example: | + { + "type": "speech.audio.delta", + "audio": "base64-encoded-audio-data" + } + SpeechAudioDoneEvent: + type: object + description: Emitted when the speech synthesis is complete and all audio has been streamed. + properties: + type: + type: string + description: | + The type of the event. Always `speech.audio.done`. + enum: + - speech.audio.done + x-stainless-const: true + usage: + type: object + description: | + Token usage statistics for the request. + properties: + input_tokens: + type: integer + description: Number of input tokens in the prompt. + output_tokens: + type: integer + description: Number of output tokens generated. + total_tokens: + type: integer + description: Total number of tokens used (input + output). + required: + - input_tokens + - output_tokens + - total_tokens + required: + - type + - usage + x-oaiMeta: + name: Stream Event (speech.audio.done) + group: speech + example: | + { + "type": "speech.audio.done", + "usage": { + "input_tokens": 14, + "output_tokens": 101, + "total_tokens": 115 + } + } + StaticChunkingStrategy: + type: object + additionalProperties: false + properties: + max_chunk_size_tokens: + type: integer + minimum: 100 + maximum: 4096 + description: >- + The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` + and the maximum value is `4096`. + chunk_overlap_tokens: + type: integer + description: | + The number of tokens that overlap between chunks. The default value is `400`. + + Note that the overlap must not exceed half of `max_chunk_size_tokens`. + required: + - max_chunk_size_tokens + - chunk_overlap_tokens + StaticChunkingStrategyRequestParam: + type: object + title: Static Chunking Strategy + description: Customize your own chunking strategy by setting chunk size and chunk overlap. + additionalProperties: false + properties: + type: + type: string + description: Always `static`. + enum: + - static + x-stainless-const: true + static: + $ref: '#/components/schemas/StaticChunkingStrategy' + required: + - type + - static + StaticChunkingStrategyResponseParam: + type: object + title: Static Chunking Strategy + additionalProperties: false + properties: + type: + type: string + description: Always `static`. + enum: + - static + x-stainless-const: true + static: + $ref: '#/components/schemas/StaticChunkingStrategy' + required: + - type + - static + StopConfiguration: + description: | + Not supported with latest reasoning models `o3` and `o4-mini`. + + Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + nullable: true + anyOf: + - type: string + default: <|endoftext|> + example: |+ + + nullable: true + - type: array + minItems: 1 + maxItems: 4 + items: + type: string + example: '["\n"]' + SubmitToolOutputsRunRequest: + type: object + additionalProperties: false + properties: + tool_outputs: + description: A list of tools for which the outputs are being submitted. + type: array + items: + type: object + properties: + tool_call_id: + type: string + description: >- + The ID of the tool call in the `required_action` object within the run object the output is + being submitted for. + output: + type: string + description: The output of the tool call to be submitted to continue the run. + stream: + anyOf: + - type: boolean + description: > + If `true`, returns a stream of events that happen during the Run as server-sent events, + terminating when the Run enters a terminal state with a `data: [DONE]` message. + - type: 'null' + required: + - tool_outputs + TextResponseFormatConfiguration: + description: | + An object specifying the format that the model must output. + + Configuring `{ "type": "json_schema" }` enables Structured Outputs, + which ensures the model will match your supplied JSON schema. Learn more in the + [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). + + The default format is `{ "type": "text" }` with no additional options. + + **Not recommended for gpt-4o and newer models:** + + Setting to `{ "type": "json_object" }` enables the older JSON mode, which + ensures the message the model generates is valid JSON. Using `json_schema` + is preferred for models that support it. + anyOf: + - $ref: '#/components/schemas/ResponseFormatText' + - $ref: '#/components/schemas/TextResponseFormatJsonSchema' + - $ref: '#/components/schemas/ResponseFormatJsonObject' + discriminator: + propertyName: type + TextResponseFormatJsonSchema: + type: object + title: JSON schema + description: | + JSON Schema response format. Used to generate structured JSON responses. + Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). + properties: + type: + type: string + description: The type of response format being defined. Always `json_schema`. + enum: + - json_schema + x-stainless-const: true + description: + type: string + description: | + A description of what the response format is for, used by the model to + determine how to respond in the format. + name: + type: string + description: | + The name of the response format. Must be a-z, A-Z, 0-9, or contain + underscores and dashes, with a maximum length of 64. + schema: + $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema' + strict: + anyOf: + - type: boolean + default: false + description: | + Whether to enable strict schema adherence when generating the output. + If set to true, the model will always follow the exact schema defined + in the `schema` field. Only a subset of JSON Schema is supported when + `strict` is `true`. To learn more, read the [Structured Outputs + guide](https://platform.openai.com/docs/guides/structured-outputs). + - type: 'null' + required: + - type + - schema + - name + ThreadObject: + type: object + title: Thread + description: Represents a thread that contains [messages](https://platform.openai.com/docs/api-reference/messages). + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `thread`. + type: string + enum: + - thread + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the thread was created. + type: integer + tool_resources: + anyOf: + - type: object + description: > + A set of resources that are made available to the assistant's tools in this thread. The + resources are specific to the type of tool. For example, the `code_interpreter` tool requires + a list of file IDs, while the `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached + to this thread. There can be a maximum of 1 vector store attached to the thread. + maxItems: 1 + items: + type: string + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + required: + - id + - object + - created_at + - tool_resources + - metadata + x-oaiMeta: + name: The thread object + beta: true + example: | + { + "id": "thread_abc123", + "object": "thread", + "created_at": 1698107661, + "metadata": {} + } + ThreadStreamEvent: + anyOf: + - type: object + properties: + enabled: + type: boolean + description: Whether to enable input audio transcription. + event: + type: string + enum: + - thread.created + x-stainless-const: true + data: + $ref: '#/components/schemas/ThreadObject' + required: + - event + - data + description: >- + Occurs when a new [thread](https://platform.openai.com/docs/api-reference/threads/object) is + created. + x-oaiMeta: + dataDescription: '`data` is a [thread](/docs/api-reference/threads/object)' + discriminator: + propertyName: event + ToggleCertificatesRequest: + type: object + properties: + certificate_ids: + type: array + items: + type: string + example: cert_abc + minItems: 1 + maxItems: 10 + required: + - certificate_ids + Tool: + description: | + A tool that can be used to generate a response. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/FunctionTool' + - $ref: '#/components/schemas/FileSearchTool' + - $ref: '#/components/schemas/ComputerUsePreviewTool' + - $ref: '#/components/schemas/WebSearchTool' + - $ref: '#/components/schemas/MCPTool' + - $ref: '#/components/schemas/CodeInterpreterTool' + - $ref: '#/components/schemas/ImageGenTool' + - $ref: '#/components/schemas/LocalShellToolParam' + - $ref: '#/components/schemas/FunctionShellToolParam' + - $ref: '#/components/schemas/CustomToolParam' + - $ref: '#/components/schemas/WebSearchPreviewTool' + - $ref: '#/components/schemas/ApplyPatchToolParam' + ToolChoiceAllowed: + type: object + title: Allowed tools + description: | + Constrains the tools available to the model to a pre-defined set. + properties: + type: + type: string + enum: + - allowed_tools + description: Allowed tool configuration type. Always `allowed_tools`. + x-stainless-const: true + mode: + type: string + enum: + - auto + - required + description: | + Constrains the tools available to the model to a pre-defined set. + + `auto` allows the model to pick from among the allowed tools and generate a + message. + + `required` requires the model to call one or more of the allowed tools. + tools: + type: array + description: | + A list of tool definitions that the model should be allowed to call. + + For the Responses API, the list of tool definitions might look like: + ```json + [ + { "type": "function", "name": "get_weather" }, + { "type": "mcp", "server_label": "deepwiki" }, + { "type": "image_generation" } + ] + ``` + items: + type: object + description: | + A tool definition that the model should be allowed to call. + additionalProperties: true + x-oaiExpandable: false + required: + - type + - mode + - tools + ToolChoiceCustom: + type: object + title: Custom tool + description: | + Use this option to force the model to call a specific custom tool. + properties: + type: + type: string + enum: + - custom + description: For custom tool calling, the type is always `custom`. + x-stainless-const: true + name: + type: string + description: The name of the custom tool to call. + required: + - type + - name + ToolChoiceFunction: + type: object + title: Function tool + description: | + Use this option to force the model to call a specific function. + properties: + type: + type: string + enum: + - function + description: For function calling, the type is always `function`. + x-stainless-const: true + name: + type: string + description: The name of the function to call. + required: + - type + - name + ToolChoiceMCP: + type: object + title: MCP tool + description: | + Use this option to force the model to call a specific tool on a remote MCP server. + properties: + type: + type: string + enum: + - mcp + description: For MCP tools, the type is always `mcp`. + x-stainless-const: true + server_label: + type: string + description: | + The label of the MCP server to use. + name: + anyOf: + - type: string + description: | + The name of the tool to call on the server. + - type: 'null' + required: + - type + - server_label + ToolChoiceOptions: + type: string + title: Tool choice mode + description: | + Controls which (if any) tool is called by the model. + + `none` means the model will not call any tool and instead generates a message. + + `auto` means the model can pick between generating a message or calling one or + more tools. + + `required` means the model must call one or more tools. + enum: + - none + - auto + - required + ToolChoiceParam: + description: | + How the model should select which tool (or tools) to use when generating + a response. See the `tools` parameter to see how to specify which tools + the model can call. + anyOf: + - $ref: '#/components/schemas/ToolChoiceOptions' + - $ref: '#/components/schemas/ToolChoiceAllowed' + - $ref: '#/components/schemas/ToolChoiceTypes' + - $ref: '#/components/schemas/ToolChoiceFunction' + - $ref: '#/components/schemas/ToolChoiceMCP' + - $ref: '#/components/schemas/ToolChoiceCustom' + - $ref: '#/components/schemas/SpecificApplyPatchParam' + - $ref: '#/components/schemas/SpecificFunctionShellParam' + discriminator: + propertyName: type + ToolChoiceTypes: + type: object + title: Hosted tool + description: | + Indicates that the model should use a built-in tool to generate a response. + [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools). + properties: + type: + type: string + description: | + The type of hosted tool the model should to use. Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + + Allowed values are: + - `file_search` + - `web_search_preview` + - `computer_use_preview` + - `code_interpreter` + - `image_generation` + enum: + - file_search + - web_search_preview + - computer_use_preview + - web_search_preview_2025_03_11 + - image_generation + - code_interpreter + required: + - type + ToolsArray: + type: array + description: | + An array of tools the model may call while generating a response. You + can specify which tool to use by setting the `tool_choice` parameter. + + We support the following categories of tools: + - **Built-in tools**: Tools that are provided by OpenAI that extend the + model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search) + or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about + [built-in tools](https://platform.openai.com/docs/guides/tools). + - **MCP Tools**: Integrations with third-party systems via custom MCP servers + or predefined connectors such as Google Drive and SharePoint. Learn more about + [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp). + - **Function calls (custom tools)**: Functions that are defined by you, + enabling the model to call your own code with strongly typed arguments + and outputs. Learn more about + [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use + custom tools to call your own code. + items: + $ref: '#/components/schemas/Tool' + TranscriptTextDeltaEvent: + type: object + description: >- + Emitted when there is an additional text delta. This is also the first event emitted when the + transcription starts. Only emitted when you [create a + transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the + `Stream` parameter set to `true`. + properties: + type: + type: string + description: | + The type of the event. Always `transcript.text.delta`. + enum: + - transcript.text.delta + x-stainless-const: true + delta: + type: string + description: | + The text delta that was additionally transcribed. + logprobs: + type: array + description: > + The log probabilities of the delta. Only included if you [create a + transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the + `include[]` parameter set to `logprobs`. + items: + type: object + properties: + token: + type: string + description: | + The token that was used to generate the log probability. + logprob: + type: number + description: | + The log probability of the token. + bytes: + type: array + items: + type: integer + description: | + The bytes that were used to generate the log probability. + segment_id: + type: string + description: > + Identifier of the diarized segment that this delta belongs to. Only present when using + `gpt-4o-transcribe-diarize`. + required: + - type + - delta + x-oaiMeta: + name: Stream Event (transcript.text.delta) + group: transcript + example: | + { + "type": "transcript.text.delta", + "delta": " wonderful" + } + TranscriptTextDoneEvent: + type: object + description: >- + Emitted when the transcription is complete. Contains the complete transcription text. Only emitted + when you [create a + transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with the + `Stream` parameter set to `true`. + properties: + type: + type: string + description: | + The type of the event. Always `transcript.text.done`. + enum: + - transcript.text.done + x-stainless-const: true + text: + type: string + description: | + The text that was transcribed. + logprobs: + type: array + description: > + The log probabilities of the individual tokens in the transcription. Only included if you [create + a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with + the `include[]` parameter set to `logprobs`. + items: + type: object + properties: + token: + type: string + description: | + The token that was used to generate the log probability. + logprob: + type: number + description: | + The log probability of the token. + bytes: + type: array + items: + type: integer + description: | + The bytes that were used to generate the log probability. + usage: + $ref: '#/components/schemas/TranscriptTextUsageTokens' + required: + - type + - text + x-oaiMeta: + name: Stream Event (transcript.text.done) + group: transcript + example: | + { + "type": "transcript.text.done", + "text": "I see skies of blue and clouds of white, the bright blessed days, the dark sacred nights, and I think to myself, what a wonderful world.", + "usage": { + "type": "tokens", + "input_tokens": 14, + "input_token_details": { + "text_tokens": 10, + "audio_tokens": 4 + }, + "output_tokens": 31, + "total_tokens": 45 + } + } + TranscriptTextSegmentEvent: + type: object + description: > + Emitted when a diarized transcription returns a completed segment with speaker information. Only + emitted when you [create a + transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) with + `stream` set to `true` and `response_format` set to `diarized_json`. + properties: + type: + type: string + description: The type of the event. Always `transcript.text.segment`. + enum: + - transcript.text.segment + x-stainless-const: true + id: + type: string + description: Unique identifier for the segment. + start: + type: number + format: float + description: Start timestamp of the segment in seconds. + end: + type: number + format: float + description: End timestamp of the segment in seconds. + text: + type: string + description: Transcript text for this segment. + speaker: + type: string + description: Speaker label for this segment. + required: + - type + - id + - start + - end + - text + - speaker + x-oaiMeta: + name: Stream Event (transcript.text.segment) + group: transcript + example: | + { + "type": "transcript.text.segment", + "id": "seg_002", + "start": 5.2, + "end": 12.8, + "text": "Hi, I need help with diarization.", + "speaker": "A" + } + TranscriptTextUsageDuration: + type: object + title: TranscriptTextUsageDuration + description: Usage statistics for models billed by audio input duration. + properties: + type: + type: string + enum: + - duration + description: The type of the usage object. Always `duration` for this variant. + x-stainless-const: true + seconds: + type: number + description: Duration of the input audio in seconds. + required: + - type + - seconds + TranscriptTextUsageTokens: + type: object + title: TranscriptTextUsageTokens + description: Usage statistics for models billed by token usage. + properties: + type: + type: string + enum: + - tokens + description: The type of the usage object. Always `tokens` for this variant. + x-stainless-const: true + input_tokens: + type: integer + description: Number of input tokens billed for this request. + input_token_details: + type: object + description: Details about the input tokens billed for this request. + properties: + text_tokens: + type: integer + description: Number of text tokens billed for this request. + audio_tokens: + type: integer + description: Number of audio tokens billed for this request. + output_tokens: + type: integer + description: Number of output tokens generated. + total_tokens: + type: integer + description: Total number of tokens used (input + output). + required: + - type + - input_tokens + - output_tokens + - total_tokens + TranscriptionChunkingStrategy: + anyOf: + - description: >- + Controls how the audio is cut into chunks. When set to `"auto"`, the server first normalizes + loudness and then uses voice activity detection (VAD) to choose boundaries. `server_vad` object + can be provided to tweak VAD detection parameters manually. If unset, the audio is transcribed as + a single block. Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30 + seconds. + anyOf: + - type: string + enum: + - auto + default: auto + description: | + Automatically set chunking parameters based on the audio. Must be set to `"auto"`. + x-stainless-const: true + - $ref: '#/components/schemas/VadConfig' + x-oaiTypeLabel: string + - type: 'null' + TranscriptionDiarizedSegment: + type: object + description: A segment of diarized transcript text with speaker metadata. + properties: + type: + type: string + description: | + The type of the segment. Always `transcript.text.segment`. + enum: + - transcript.text.segment + x-stainless-const: true + id: + type: string + description: Unique identifier for the segment. + start: + type: number + format: float + description: Start timestamp of the segment in seconds. + end: + type: number + format: float + description: End timestamp of the segment in seconds. + text: + type: string + description: Transcript text for this segment. + speaker: + type: string + description: > + Speaker label for this segment. When known speakers are provided, the label matches + `known_speaker_names[]`. Otherwise speakers are labeled sequentially using capital letters (`A`, + `B`, ...). + required: + - type + - id + - start + - end + - text + - speaker + TranscriptionInclude: + type: string + enum: + - logprobs + TranscriptionSegment: + type: object + properties: + id: + type: integer + description: Unique identifier of the segment. + seek: + type: integer + description: Seek offset of the segment. + start: + type: number + format: float + description: Start time of the segment in seconds. + end: + type: number + format: float + description: End time of the segment in seconds. + text: + type: string + description: Text content of the segment. + tokens: + type: array + items: + type: integer + description: Array of token IDs for the text content. + temperature: + type: number + format: float + description: Temperature parameter used for generating the segment. + avg_logprob: + type: number + format: float + description: Average logprob of the segment. If the value is lower than -1, consider the logprobs failed. + compression_ratio: + type: number + format: float + description: >- + Compression ratio of the segment. If the value is greater than 2.4, consider the compression + failed. + no_speech_prob: + type: number + format: float + description: >- + Probability of no speech in the segment. If the value is higher than 1.0 and the `avg_logprob` is + below -1, consider this segment silent. + required: + - id + - seek + - start + - end + - text + - tokens + - temperature + - avg_logprob + - compression_ratio + - no_speech_prob + TranscriptionWord: + type: object + properties: + word: + type: string + description: The text content of the word. + start: + type: number + format: float + description: Start time of the word in seconds. + end: + type: number + format: float + description: End time of the word in seconds. + required: + - word + - start + - end + TruncationObject: + type: object + title: Thread Truncation Controls + description: >- + Controls for how a thread will be truncated prior to the run. Use this to control the initial context + window of the run. + properties: + type: + type: string + description: >- + The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, + the thread will be truncated to the n most recent messages in the thread. When set to `auto`, + messages in the middle of the thread will be dropped to fit the context length of the model, + `max_prompt_tokens`. + enum: + - auto + - last_messages + last_messages: + anyOf: + - type: integer + description: The number of most recent messages from the thread when constructing the context for the run. + minimum: 1 + - type: 'null' + required: + - type + Type: + type: object + title: Type + description: | + An action to type in text. + properties: + type: + type: string + enum: + - type + default: type + description: | + Specifies the event type. For a type action, this property is + always set to `type`. + x-stainless-const: true + text: + type: string + description: | + The text to type. + required: + - type + - text + UpdateVectorStoreFileAttributesRequest: + type: object + additionalProperties: false + properties: + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + required: + - attributes + x-oaiMeta: + name: Update vector store file attributes request + UpdateVectorStoreRequest: + type: object + additionalProperties: false + properties: + name: + description: The name of the vector store. + type: string + nullable: true + expires_after: + allOf: + - $ref: '#/components/schemas/VectorStoreExpirationAfter' + - nullable: true + metadata: + $ref: '#/components/schemas/Metadata' + Upload: + type: object + title: Upload + description: | + The Upload object can accept byte chunks in the form of Parts. + properties: + id: + type: string + description: The Upload unique identifier, which can be referenced in API endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the Upload was created. + filename: + type: string + description: The name of the file to be uploaded. + bytes: + type: integer + description: The intended number of bytes to be uploaded. + purpose: + type: string + description: >- + The intended purpose of the file. [Please refer + here](https://platform.openai.com/docs/api-reference/files/object#files/object-purpose) for + acceptable values. + status: + type: string + description: The status of the Upload. + enum: + - pending + - completed + - cancelled + - expired + expires_at: + type: integer + description: The Unix timestamp (in seconds) for when the Upload will expire. + object: + type: string + description: The object type, which is always "upload". + enum: + - upload + x-stainless-const: true + file: + allOf: + - $ref: '#/components/schemas/OpenAIFile' + - nullable: true + description: The ready File object after the Upload is completed. + required: + - bytes + - created_at + - expires_at + - filename + - id + - purpose + - status + - object + x-oaiMeta: + name: The upload object + example: | + { + "id": "upload_abc123", + "object": "upload", + "bytes": 2147483648, + "created_at": 1719184911, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + "status": "completed", + "expires_at": 1719127296, + "file": { + "id": "file-xyz321", + "object": "file", + "bytes": 2147483648, + "created_at": 1719186911, + "filename": "training_examples.jsonl", + "purpose": "fine-tune", + } + } + UploadCertificateRequest: + type: object + properties: + name: + type: string + description: An optional name for the certificate + content: + type: string + description: The certificate content in PEM format + required: + - content + UploadPart: + type: object + title: UploadPart + description: | + The upload Part represents a chunk of bytes we can add to an Upload object. + properties: + id: + type: string + description: The upload Part unique identifier, which can be referenced in API endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the Part was created. + upload_id: + type: string + description: The ID of the Upload object that this Part was added to. + object: + type: string + description: The object type, which is always `upload.part`. + enum: + - upload.part + x-stainless-const: true + required: + - created_at + - id + - object + - upload_id + x-oaiMeta: + name: The upload part object + example: | + { + "id": "part_def456", + "object": "upload.part", + "created_at": 1719186911, + "upload_id": "upload_abc123" + } + UsageAudioSpeechesResult: + type: object + description: The aggregated audio speeches usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.audio_speeches.result + x-stainless-const: true + characters: + type: integer + description: The number of characters processed. + num_model_requests: + type: integer + description: The count of requests made to the model. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + required: + - object + - characters + - num_model_requests + x-oaiMeta: + name: Audio speeches usage object + example: | + { + "object": "organization.usage.audio_speeches.result", + "characters": 45, + "num_model_requests": 1, + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "tts-1" + } + UsageAudioTranscriptionsResult: + type: object + description: The aggregated audio transcriptions usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.audio_transcriptions.result + x-stainless-const: true + seconds: + type: integer + description: The number of seconds processed. + num_model_requests: + type: integer + description: The count of requests made to the model. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + required: + - object + - seconds + - num_model_requests + x-oaiMeta: + name: Audio transcriptions usage object + example: | + { + "object": "organization.usage.audio_transcriptions.result", + "seconds": 10, + "num_model_requests": 1, + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "tts-1" + } + UsageCodeInterpreterSessionsResult: + type: object + description: The aggregated code interpreter sessions usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.code_interpreter_sessions.result + x-stainless-const: true + num_sessions: + type: integer + description: The number of code interpreter sessions. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + required: + - object + - sessions + x-oaiMeta: + name: Code interpreter sessions usage object + example: | + { + "object": "organization.usage.code_interpreter_sessions.result", + "num_sessions": 1, + "project_id": "proj_abc" + } + UsageCompletionsResult: + type: object + description: The aggregated completions usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.completions.result + x-stainless-const: true + input_tokens: + type: integer + description: >- + The aggregated number of text input tokens used, including cached tokens. For customers subscribe + to scale tier, this includes scale tier tokens. + input_cached_tokens: + type: integer + description: >- + The aggregated number of text input tokens that has been cached from previous requests. For + customers subscribe to scale tier, this includes scale tier tokens. + output_tokens: + type: integer + description: >- + The aggregated number of text output tokens used. For customers subscribe to scale tier, this + includes scale tier tokens. + input_audio_tokens: + type: integer + description: The aggregated number of audio input tokens used, including cached tokens. + output_audio_tokens: + type: integer + description: The aggregated number of audio output tokens used. + num_model_requests: + type: integer + description: The count of requests made to the model. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + batch: + anyOf: + - type: boolean + description: When `group_by=batch`, this field tells whether the grouped usage result is batch or not. + - type: 'null' + service_tier: + anyOf: + - type: string + description: >- + When `group_by=service_tier`, this field provides the service tier of the grouped usage + result. + - type: 'null' + required: + - object + - input_tokens + - output_tokens + - num_model_requests + x-oaiMeta: + name: Completions usage object + example: | + { + "object": "organization.usage.completions.result", + "input_tokens": 5000, + "output_tokens": 1000, + "input_cached_tokens": 4000, + "input_audio_tokens": 300, + "output_audio_tokens": 200, + "num_model_requests": 5, + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "gpt-4o-mini-2024-07-18", + "batch": false, + "service_tier": "default" + } + UsageEmbeddingsResult: + type: object + description: The aggregated embeddings usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.embeddings.result + x-stainless-const: true + input_tokens: + type: integer + description: The aggregated number of input tokens used. + num_model_requests: + type: integer + description: The count of requests made to the model. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + required: + - object + - input_tokens + - num_model_requests + x-oaiMeta: + name: Embeddings usage object + example: | + { + "object": "organization.usage.embeddings.result", + "input_tokens": 20, + "num_model_requests": 2, + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "text-embedding-ada-002-v2" + } + UsageImagesResult: + type: object + description: The aggregated images usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.images.result + x-stainless-const: true + images: + type: integer + description: The number of images processed. + num_model_requests: + type: integer + description: The count of requests made to the model. + source: + anyOf: + - type: string + description: >- + When `group_by=source`, this field provides the source of the grouped usage result, possible + values are `image.generation`, `image.edit`, `image.variation`. + - type: 'null' + size: + anyOf: + - type: string + description: When `group_by=size`, this field provides the image size of the grouped usage result. + - type: 'null' + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + required: + - object + - images + - num_model_requests + x-oaiMeta: + name: Images usage object + example: | + { + "object": "organization.usage.images.result", + "images": 2, + "num_model_requests": 2, + "size": "1024x1024", + "source": "image.generation", + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "dall-e-3" + } + UsageModerationsResult: + type: object + description: The aggregated moderations usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.moderations.result + x-stainless-const: true + input_tokens: + type: integer + description: The aggregated number of input tokens used. + num_model_requests: + type: integer + description: The count of requests made to the model. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + user_id: + anyOf: + - type: string + description: When `group_by=user_id`, this field provides the user ID of the grouped usage result. + - type: 'null' + api_key_id: + anyOf: + - type: string + description: When `group_by=api_key_id`, this field provides the API key ID of the grouped usage result. + - type: 'null' + model: + anyOf: + - type: string + description: When `group_by=model`, this field provides the model name of the grouped usage result. + - type: 'null' + required: + - object + - input_tokens + - num_model_requests + x-oaiMeta: + name: Moderations usage object + example: | + { + "object": "organization.usage.moderations.result", + "input_tokens": 20, + "num_model_requests": 2, + "project_id": "proj_abc", + "user_id": "user-abc", + "api_key_id": "key_abc", + "model": "text-moderation" + } + UsageResponse: + type: object + properties: + object: + type: string + enum: + - page + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/UsageTimeBucket' + has_more: + type: boolean + next_page: + type: string + required: + - object + - data + - has_more + - next_page + UsageTimeBucket: + type: object + properties: + object: + type: string + enum: + - bucket + x-stainless-const: true + start_time: + type: integer + end_time: + type: integer + result: + type: array + items: + anyOf: + - $ref: '#/components/schemas/UsageCompletionsResult' + - $ref: '#/components/schemas/UsageEmbeddingsResult' + - $ref: '#/components/schemas/UsageModerationsResult' + - $ref: '#/components/schemas/UsageImagesResult' + - $ref: '#/components/schemas/UsageAudioSpeechesResult' + - $ref: '#/components/schemas/UsageAudioTranscriptionsResult' + - $ref: '#/components/schemas/UsageVectorStoresResult' + - $ref: '#/components/schemas/UsageCodeInterpreterSessionsResult' + - $ref: '#/components/schemas/CostsResult' + discriminator: + propertyName: object + required: + - object + - start_time + - end_time + - result + UsageVectorStoresResult: + type: object + description: The aggregated vector stores usage details of the specific time bucket. + properties: + object: + type: string + enum: + - organization.usage.vector_stores.result + x-stainless-const: true + usage_bytes: + type: integer + description: The vector stores usage in bytes. + project_id: + anyOf: + - type: string + description: When `group_by=project_id`, this field provides the project ID of the grouped usage result. + - type: 'null' + required: + - object + - usage_bytes + x-oaiMeta: + name: Vector stores usage object + example: | + { + "object": "organization.usage.vector_stores.result", + "usage_bytes": 1024, + "project_id": "proj_abc" + } + User: + type: object + description: Represents an individual `user` within an organization. + properties: + object: + type: string + enum: + - organization.user + description: The object type, which is always `organization.user` + x-stainless-const: true + id: + type: string + description: The identifier, which can be referenced in API endpoints + name: + type: string + description: The name of the user + email: + type: string + description: The email address of the user + role: + type: string + enum: + - owner + - reader + description: '`owner` or `reader`' + added_at: + type: integer + description: The Unix timestamp (in seconds) of when the user was added. + required: + - object + - id + - name + - email + - role + - added_at + x-oaiMeta: + name: The user object + example: | + { + "object": "organization.user", + "id": "user_abc", + "name": "First Last", + "email": "user@example.com", + "role": "owner", + "added_at": 1711471533 + } + UserDeleteResponse: + type: object + properties: + object: + type: string + enum: + - organization.user.deleted + x-stainless-const: true + id: + type: string + deleted: + type: boolean + required: + - object + - id + - deleted + UserListResponse: + type: object + properties: + object: + type: string + enum: + - list + x-stainless-const: true + data: + type: array + items: + $ref: '#/components/schemas/User' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + required: + - object + - data + - first_id + - last_id + - has_more + UserRoleUpdateRequest: + type: object + properties: + role: + type: string + enum: + - owner + - reader + description: '`owner` or `reader`' + required: + - role + VadConfig: + type: object + additionalProperties: false + required: + - type + properties: + type: + type: string + enum: + - server_vad + description: Must be set to `server_vad` to enable manual chunking using server side VAD. + prefix_padding_ms: + type: integer + default: 300 + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). + silence_duration_ms: + type: integer + default: 200 + description: | + Duration of silence to detect speech stop (in milliseconds). + With shorter values the model will respond more quickly, + but may jump in on short pauses from the user. + threshold: + type: number + default: 0.5 + description: | + Sensitivity threshold (0.0 to 1.0) for voice activity detection. A + higher threshold will require louder audio to activate the model, and + thus might perform better in noisy environments. + ValidateGraderRequest: + type: object + title: ValidateGraderRequest + properties: + grader: + type: object + description: The grader used for the fine-tuning job. + anyOf: + - $ref: '#/components/schemas/GraderStringCheck' + - $ref: '#/components/schemas/GraderTextSimilarity' + - $ref: '#/components/schemas/GraderPython' + - $ref: '#/components/schemas/GraderScoreModel' + - $ref: '#/components/schemas/GraderMulti' + required: + - grader + ValidateGraderResponse: + type: object + title: ValidateGraderResponse + properties: + grader: + type: object + description: The grader used for the fine-tuning job. + anyOf: + - $ref: '#/components/schemas/GraderStringCheck' + - $ref: '#/components/schemas/GraderTextSimilarity' + - $ref: '#/components/schemas/GraderPython' + - $ref: '#/components/schemas/GraderScoreModel' + - $ref: '#/components/schemas/GraderMulti' + VectorStoreExpirationAfter: + type: object + title: Vector store expiration policy + description: The expiration policy for a vector store. + properties: + anchor: + description: 'Anchor timestamp after which the expiration policy applies. Supported anchors: `last_active_at`.' + type: string + enum: + - last_active_at + x-stainless-const: true + days: + description: The number of days after the anchor time that the vector store will expire. + type: integer + minimum: 1 + maximum: 365 + required: + - anchor + - days + VectorStoreFileAttributes: + anyOf: + - type: object + description: | + Set of 16 key-value pairs that can be attached to an object. This can be + useful for storing additional information about the object in a structured + format, and querying for objects via API or the dashboard. Keys are strings + with a maximum length of 64 characters. Values are strings with a maximum + length of 512 characters, booleans, or numbers. + maxProperties: 16 + propertyNames: + type: string + maxLength: 64 + additionalProperties: + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + x-oaiTypeLabel: map + - type: 'null' + VectorStoreFileBatchObject: + type: object + title: Vector store file batch + description: A batch of files attached to a vector store. + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `vector_store.file_batch`. + type: string + enum: + - vector_store.files_batch + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the vector store files batch was created. + type: integer + vector_store_id: + description: >- + The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + that the [File](https://platform.openai.com/docs/api-reference/files) is attached to. + type: string + status: + description: >- + The status of the vector store files batch, which can be either `in_progress`, `completed`, + `cancelled` or `failed`. + type: string + enum: + - in_progress + - completed + - cancelled + - failed + file_counts: + type: object + properties: + in_progress: + description: The number of files that are currently being processed. + type: integer + completed: + description: The number of files that have been processed. + type: integer + failed: + description: The number of files that have failed to process. + type: integer + cancelled: + description: The number of files that where cancelled. + type: integer + total: + description: The total number of files. + type: integer + required: + - in_progress + - completed + - cancelled + - failed + - total + required: + - id + - object + - created_at + - vector_store_id + - status + - file_counts + x-oaiMeta: + name: The vector store files batch object + beta: true + example: | + { + "id": "vsfb_123", + "object": "vector_store.files_batch", + "created_at": 1698107661, + "vector_store_id": "vs_abc123", + "status": "completed", + "file_counts": { + "in_progress": 0, + "completed": 100, + "failed": 0, + "cancelled": 0, + "total": 100 + } + } + VectorStoreFileContentResponse: + type: object + description: Represents the parsed content of a vector store file. + properties: + object: + type: string + enum: + - vector_store.file_content.page + description: The object type, which is always `vector_store.file_content.page` + x-stainless-const: true + data: + type: array + description: Parsed content of the file. + items: + type: object + properties: + type: + type: string + description: The content type (currently only `"text"`) + text: + type: string + description: The text content + has_more: + type: boolean + description: Indicates if there are more content pages to fetch. + next_page: + anyOf: + - type: string + description: The token for the next page, if any. + - type: 'null' + required: + - object + - data + - has_more + - next_page + VectorStoreFileObject: + type: object + title: Vector store files + description: A list of files attached to a vector store. + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `vector_store.file`. + type: string + enum: + - vector_store.file + x-stainless-const: true + usage_bytes: + description: >- + The total vector store usage in bytes. Note that this may be different from the original file + size. + type: integer + created_at: + description: The Unix timestamp (in seconds) for when the vector store file was created. + type: integer + vector_store_id: + description: >- + The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + that the [File](https://platform.openai.com/docs/api-reference/files) is attached to. + type: string + status: + description: >- + The status of the vector store file, which can be either `in_progress`, `completed`, `cancelled`, + or `failed`. The status `completed` indicates that the vector store file is ready for use. + type: string + enum: + - in_progress + - completed + - cancelled + - failed + last_error: + anyOf: + - type: object + description: The last error associated with this vector store file. Will be `null` if there are no errors. + properties: + code: + type: string + description: One of `server_error`, `unsupported_file`, or `invalid_file`. + enum: + - server_error + - unsupported_file + - invalid_file + message: + type: string + description: A human-readable description of the error. + required: + - code + - message + - type: 'null' + chunking_strategy: + $ref: '#/components/schemas/ChunkingStrategyResponse' + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + required: + - id + - object + - usage_bytes + - created_at + - vector_store_id + - status + - last_error + x-oaiMeta: + name: The vector store file object + beta: true + example: | + { + "id": "file-abc123", + "object": "vector_store.file", + "usage_bytes": 1234, + "created_at": 1698107661, + "vector_store_id": "vs_abc123", + "status": "completed", + "last_error": null, + "chunking_strategy": { + "type": "static", + "static": { + "max_chunk_size_tokens": 800, + "chunk_overlap_tokens": 400 + } + } + } + VectorStoreObject: + type: object + title: Vector store + description: A vector store is a collection of processed files can be used by the `file_search` tool. + properties: + id: + description: The identifier, which can be referenced in API endpoints. + type: string + object: + description: The object type, which is always `vector_store`. + type: string + enum: + - vector_store + x-stainless-const: true + created_at: + description: The Unix timestamp (in seconds) for when the vector store was created. + type: integer + name: + description: The name of the vector store. + type: string + usage_bytes: + description: The total number of bytes used by the files in the vector store. + type: integer + file_counts: + type: object + properties: + in_progress: + description: The number of files that are currently being processed. + type: integer + completed: + description: The number of files that have been successfully processed. + type: integer + failed: + description: The number of files that have failed to process. + type: integer + cancelled: + description: The number of files that were cancelled. + type: integer + total: + description: The total number of files. + type: integer + required: + - in_progress + - completed + - failed + - cancelled + - total + status: + description: >- + The status of the vector store, which can be either `expired`, `in_progress`, or `completed`. A + status of `completed` indicates that the vector store is ready for use. + type: string + enum: + - expired + - in_progress + - completed + expires_after: + $ref: '#/components/schemas/VectorStoreExpirationAfter' + expires_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the vector store will expire. + type: integer + - type: 'null' + last_active_at: + anyOf: + - description: The Unix timestamp (in seconds) for when the vector store was last active. + type: integer + - type: 'null' + metadata: + $ref: '#/components/schemas/Metadata' + required: + - id + - object + - usage_bytes + - created_at + - status + - last_active_at + - name + - file_counts + - metadata + x-oaiMeta: + name: The vector store object + example: | + { + "id": "vs_123", + "object": "vector_store", + "created_at": 1698107661, + "usage_bytes": 123456, + "last_active_at": 1698107661, + "name": "my_vector_store", + "status": "completed", + "file_counts": { + "in_progress": 0, + "completed": 100, + "cancelled": 0, + "failed": 0, + "total": 100 + }, + "last_used_at": 1698107661 + } + VectorStoreSearchRequest: + type: object + additionalProperties: false + properties: + query: + description: A query string for a search + anyOf: + - type: string + - type: array + items: + type: string + description: A list of queries to search for. + minItems: 1 + rewrite_query: + description: Whether to rewrite the natural language query for vector search. + type: boolean + default: false + max_num_results: + description: The maximum number of results to return. This number should be between 1 and 50 inclusive. + type: integer + default: 10 + minimum: 1 + maximum: 50 + filters: + description: A filter to apply based on file attributes. + anyOf: + - $ref: '#/components/schemas/ComparisonFilter' + - $ref: '#/components/schemas/CompoundFilter' + ranking_options: + description: Ranking options for search. + type: object + additionalProperties: false + properties: + ranker: + description: Enable re-ranking; set to `none` to disable, which can help reduce latency. + type: string + enum: + - none + - auto + - default-2024-11-15 + default: auto + score_threshold: + type: number + minimum: 0 + maximum: 1 + default: 0 + required: + - query + x-oaiMeta: + name: Vector store search request + VectorStoreSearchResultContentObject: + type: object + additionalProperties: false + properties: + type: + description: The type of content. + type: string + enum: + - text + text: + description: The text content returned from search. + type: string + required: + - type + - text + x-oaiMeta: + name: Vector store search result content object + VectorStoreSearchResultItem: + type: object + additionalProperties: false + properties: + file_id: + type: string + description: The ID of the vector store file. + filename: + type: string + description: The name of the vector store file. + score: + type: number + description: The similarity score for the result. + minimum: 0 + maximum: 1 + attributes: + $ref: '#/components/schemas/VectorStoreFileAttributes' + content: + type: array + description: Content chunks from the file. + items: + $ref: '#/components/schemas/VectorStoreSearchResultContentObject' + required: + - file_id + - filename + - score + - attributes + - content + x-oaiMeta: + name: Vector store search result item + VectorStoreSearchResultsPage: + type: object + additionalProperties: false + properties: + object: + type: string + enum: + - vector_store.search_results.page + description: The object type, which is always `vector_store.search_results.page` + x-stainless-const: true + search_query: + type: array + items: + type: string + description: The query used for this search. + minItems: 1 + data: + type: array + description: The list of search result items. + items: + $ref: '#/components/schemas/VectorStoreSearchResultItem' + has_more: + type: boolean + description: Indicates if there are more results to fetch. + next_page: + anyOf: + - type: string + description: The token for the next page, if any. + - type: 'null' + required: + - object + - search_query + - data + - has_more + - next_page + x-oaiMeta: + name: Vector store search results page + Verbosity: + anyOf: + - type: string + enum: + - low + - medium + - high + default: medium + description: | + Constrains the verbosity of the model's response. Lower values will result in + more concise responses, while higher values will result in more verbose responses. + Currently supported values are `low`, `medium`, and `high`. + - type: 'null' + VoiceIdsShared: + example: ash + anyOf: + - type: string + - type: string + enum: + - alloy + - ash + - ballad + - coral + - echo + - sage + - shimmer + - verse + - marin + - cedar + Wait: + type: object + title: Wait + description: | + A wait action. + properties: + type: + type: string + enum: + - wait + default: wait + description: | + Specifies the event type. For a wait action, this property is + always set to `wait`. + x-stainless-const: true + required: + - type + WebSearchActionFind: + type: object + title: Find action + description: | + Action type "find": Searches for a pattern within a loaded page. + properties: + type: + type: string + enum: + - find + description: | + The action type. + x-stainless-const: true + url: + type: string + format: uri + description: | + The URL of the page searched for the pattern. + pattern: + type: string + description: | + The pattern or text to search for within the page. + required: + - type + - url + - pattern + WebSearchActionOpenPage: + type: object + title: Open page action + description: | + Action type "open_page" - Opens a specific URL from search results. + properties: + type: + type: string + enum: + - open_page + description: | + The action type. + x-stainless-const: true + url: + type: string + format: uri + description: | + The URL opened by the model. + required: + - type + - url + WebSearchActionSearch: + type: object + title: Search action + description: | + Action type "search" - Performs a web search query. + properties: + type: + type: string + enum: + - search + description: | + The action type. + x-stainless-const: true + query: + type: string + description: | + The search query. + sources: + type: array + title: Web search sources + description: | + The sources used in the search. + items: + type: object + title: Web search source + description: | + A source used in the search. + properties: + type: + type: string + enum: + - url + description: | + The type of source. Always `url`. + x-stainless-const: true + url: + type: string + description: | + The URL of the source. + required: + - type + - url + required: + - type + - query + WebSearchApproximateLocation: + anyOf: + - type: object + title: Web search approximate location + description: | + The approximate location of the user. + properties: + type: + type: string + enum: + - approximate + description: The type of location approximation. Always `approximate`. + default: approximate + x-stainless-const: true + country: + anyOf: + - type: string + description: >- + The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + e.g. `US`. + - type: 'null' + region: + anyOf: + - type: string + description: Free text input for the region of the user, e.g. `California`. + - type: 'null' + city: + anyOf: + - type: string + description: Free text input for the city of the user, e.g. `San Francisco`. + - type: 'null' + timezone: + anyOf: + - type: string + description: >- + The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. + `America/Los_Angeles`. + - type: 'null' + - type: 'null' + WebSearchContextSize: + type: string + description: | + High level guidance for the amount of context window space to use for the + search. One of `low`, `medium`, or `high`. `medium` is the default. + enum: + - low + - medium + - high + default: medium + WebSearchLocation: + type: object + title: Web search location + description: Approximate location parameters for the search. + properties: + country: + type: string + description: | + The two-letter + [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, + e.g. `US`. + region: + type: string + description: | + Free text input for the region of the user, e.g. `California`. + city: + type: string + description: | + Free text input for the city of the user, e.g. `San Francisco`. + timezone: + type: string + description: | + The [IANA timezone](https://timeapi.io/documentation/iana-timezones) + of the user, e.g. `America/Los_Angeles`. + WebSearchTool: + type: object + title: Web search + description: | + Search the Internet for sources related to the prompt. Learn more about the + [web search tool](https://platform.openai.com/docs/guides/tools-web-search). + properties: + type: + type: string + enum: + - web_search + - web_search_2025_08_26 + description: The type of the web search tool. One of `web_search` or `web_search_2025_08_26`. + default: web_search + filters: + anyOf: + - type: object + description: | + Filters for the search. + properties: + allowed_domains: + anyOf: + - type: array + title: Allowed domains for the search. + description: | + Allowed domains for the search. If not provided, all domains are allowed. + Subdomains of the provided domains are allowed as well. + + Example: `["pubmed.ncbi.nlm.nih.gov"]` + items: + type: string + description: Allowed domain for the search. + default: [] + - type: 'null' + - type: 'null' + user_location: + $ref: '#/components/schemas/WebSearchApproximateLocation' + search_context_size: + type: string + enum: + - low + - medium + - high + default: medium + description: >- + High level guidance for the amount of context window space to use for the search. One of `low`, + `medium`, or `high`. `medium` is the default. + required: + - type + WebSearchToolCall: + type: object + title: Web search tool call + description: | + The results of a web search tool call. See the + [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information. + properties: + id: + type: string + description: | + The unique ID of the web search tool call. + type: + type: string + enum: + - web_search_call + description: | + The type of the web search tool call. Always `web_search_call`. + x-stainless-const: true + status: + type: string + description: | + The status of the web search tool call. + enum: + - in_progress + - searching + - completed + - failed + action: + type: object + description: | + An object describing the specific action taken in this web search call. + Includes details on how the model used the web (search, open_page, find). + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/WebSearchActionSearch' + - $ref: '#/components/schemas/WebSearchActionOpenPage' + - $ref: '#/components/schemas/WebSearchActionFind' + required: + - id + - type + - status + - action + WebhookBatchCancelled: + type: object + title: batch.cancelled + description: | + Sent when a batch API request has been cancelled. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the batch API request was cancelled. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the batch API request. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `batch.cancelled`. + enum: + - batch.cancelled + x-stainless-const: true + x-oaiMeta: + name: batch.cancelled + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "batch.cancelled", + "created_at": 1719168000, + "data": { + "id": "batch_abc123" + } + } + WebhookBatchCompleted: + type: object + title: batch.completed + description: | + Sent when a batch API request has been completed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the batch API request was completed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the batch API request. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `batch.completed`. + enum: + - batch.completed + x-stainless-const: true + x-oaiMeta: + name: batch.completed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "batch.completed", + "created_at": 1719168000, + "data": { + "id": "batch_abc123" + } + } + WebhookBatchExpired: + type: object + title: batch.expired + description: | + Sent when a batch API request has expired. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the batch API request expired. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the batch API request. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `batch.expired`. + enum: + - batch.expired + x-stainless-const: true + x-oaiMeta: + name: batch.expired + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "batch.expired", + "created_at": 1719168000, + "data": { + "id": "batch_abc123" + } + } + WebhookBatchFailed: + type: object + title: batch.failed + description: | + Sent when a batch API request has failed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the batch API request failed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the batch API request. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `batch.failed`. + enum: + - batch.failed + x-stainless-const: true + x-oaiMeta: + name: batch.failed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "batch.failed", + "created_at": 1719168000, + "data": { + "id": "batch_abc123" + } + } + WebhookEvalRunCanceled: + type: object + title: eval.run.canceled + description: | + Sent when an eval run has been canceled. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the eval run was canceled. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the eval run. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `eval.run.canceled`. + enum: + - eval.run.canceled + x-stainless-const: true + x-oaiMeta: + name: eval.run.canceled + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "eval.run.canceled", + "created_at": 1719168000, + "data": { + "id": "evalrun_abc123" + } + } + WebhookEvalRunFailed: + type: object + title: eval.run.failed + description: | + Sent when an eval run has failed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the eval run failed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the eval run. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `eval.run.failed`. + enum: + - eval.run.failed + x-stainless-const: true + x-oaiMeta: + name: eval.run.failed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "eval.run.failed", + "created_at": 1719168000, + "data": { + "id": "evalrun_abc123" + } + } + WebhookEvalRunSucceeded: + type: object + title: eval.run.succeeded + description: | + Sent when an eval run has succeeded. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the eval run succeeded. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the eval run. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `eval.run.succeeded`. + enum: + - eval.run.succeeded + x-stainless-const: true + x-oaiMeta: + name: eval.run.succeeded + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "eval.run.succeeded", + "created_at": 1719168000, + "data": { + "id": "evalrun_abc123" + } + } + WebhookFineTuningJobCancelled: + type: object + title: fine_tuning.job.cancelled + description: | + Sent when a fine-tuning job has been cancelled. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the fine-tuning job was cancelled. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the fine-tuning job. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `fine_tuning.job.cancelled`. + enum: + - fine_tuning.job.cancelled + x-stainless-const: true + x-oaiMeta: + name: fine_tuning.job.cancelled + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "fine_tuning.job.cancelled", + "created_at": 1719168000, + "data": { + "id": "ftjob_abc123" + } + } + WebhookFineTuningJobFailed: + type: object + title: fine_tuning.job.failed + description: | + Sent when a fine-tuning job has failed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the fine-tuning job failed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the fine-tuning job. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `fine_tuning.job.failed`. + enum: + - fine_tuning.job.failed + x-stainless-const: true + x-oaiMeta: + name: fine_tuning.job.failed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "fine_tuning.job.failed", + "created_at": 1719168000, + "data": { + "id": "ftjob_abc123" + } + } + WebhookFineTuningJobSucceeded: + type: object + title: fine_tuning.job.succeeded + description: | + Sent when a fine-tuning job has succeeded. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the fine-tuning job succeeded. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the fine-tuning job. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `fine_tuning.job.succeeded`. + enum: + - fine_tuning.job.succeeded + x-stainless-const: true + x-oaiMeta: + name: fine_tuning.job.succeeded + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "fine_tuning.job.succeeded", + "created_at": 1719168000, + "data": { + "id": "ftjob_abc123" + } + } + WebhookRealtimeCallIncoming: + type: object + title: realtime.call.incoming + description: | + Sent when Realtime API Receives a incoming SIP call. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the model response was completed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - call_id + - sip_headers + properties: + call_id: + type: string + description: | + The unique ID of this call. + sip_headers: + type: array + description: | + Headers from the SIP Invite. + items: + type: object + description: | + A header from the SIP Invite. + required: + - name + - value + properties: + name: + type: string + description: | + Name of the SIP Header. + value: + type: string + description: | + Value of the SIP Header. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `realtime.call.incoming`. + enum: + - realtime.call.incoming + x-stainless-const: true + x-oaiMeta: + name: realtime.call.incoming + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "realtime.call.incoming", + "created_at": 1719168000, + "data": { + "call_id": "rtc_479a275623b54bdb9b6fbae2f7cbd408", + "sip_headers": [ + {"name": "Max-Forwards", "value": "63"}, + {"name": "CSeq", "value": "851287 INVITE"}, + {"name": "Content-Type", "value": "application/sdp"}, + ] + } + } + WebhookResponseCancelled: + type: object + title: response.cancelled + description: | + Sent when a background response has been cancelled. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the model response was cancelled. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the model response. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `response.cancelled`. + enum: + - response.cancelled + x-stainless-const: true + x-oaiMeta: + name: response.cancelled + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "response.cancelled", + "created_at": 1719168000, + "data": { + "id": "resp_abc123" + } + } + WebhookResponseCompleted: + type: object + title: response.completed + description: | + Sent when a background response has been completed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the model response was completed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the model response. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `response.completed`. + enum: + - response.completed + x-stainless-const: true + x-oaiMeta: + name: response.completed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "response.completed", + "created_at": 1719168000, + "data": { + "id": "resp_abc123" + } + } + WebhookResponseFailed: + type: object + title: response.failed + description: | + Sent when a background response has failed. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the model response failed. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the model response. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `response.failed`. + enum: + - response.failed + x-stainless-const: true + x-oaiMeta: + name: response.failed + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "response.failed", + "created_at": 1719168000, + "data": { + "id": "resp_abc123" + } + } + WebhookResponseIncomplete: + type: object + title: response.incomplete + description: | + Sent when a background response has been interrupted. + required: + - created_at + - id + - data + - type + properties: + created_at: + type: integer + description: | + The Unix timestamp (in seconds) of when the model response was interrupted. + id: + type: string + description: | + The unique ID of the event. + data: + type: object + description: | + Event data payload. + required: + - id + properties: + id: + type: string + description: | + The unique ID of the model response. + object: + type: string + description: | + The object of the event. Always `event`. + enum: + - event + x-stainless-const: true + type: + type: string + description: | + The type of the event. Always `response.incomplete`. + enum: + - response.incomplete + x-stainless-const: true + x-oaiMeta: + name: response.incomplete + group: webhook-events + example: | + { + "id": "evt_abc123", + "type": "response.incomplete", + "created_at": 1719168000, + "data": { + "id": "resp_abc123" + } + } + IncludeEnum: + type: string + enum: + - file_search_call.results + - web_search_call.results + - web_search_call.action.sources + - message.input_image.image_url + - computer_call_output.output.image_url + - code_interpreter_call.outputs + - reasoning.encrypted_content + - message.output_text.logprobs + description: >- + Specify additional output data to include in the model response. Currently supported values are: + + - `web_search_call.action.sources`: Include the sources of the web search tool call. + + - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code interpreter + tool call items. + + - `computer_call_output.output.image_url`: Include image urls from the computer call output. + + - `file_search_call.results`: Include the search results of the file search tool call. + + - `message.input_image.image_url`: Include image urls from the input message. + + - `message.output_text.logprobs`: Include logprobs with assistant messages. + + - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in reasoning item + outputs. This enables reasoning items to be used in multi-turn conversations when using the Responses + API statelessly (like when the `store` parameter is set to `false`, or when an organization is + enrolled in the zero data retention program). + MessageStatus: + type: string + enum: + - in_progress + - completed + - incomplete + MessageRole: + type: string + enum: + - unknown + - user + - assistant + - system + - critic + - discriminator + - developer + - tool + InputTextContent: + properties: + type: + type: string + enum: + - input_text + description: The type of the input item. Always `input_text`. + default: input_text + x-stainless-const: true + text: + type: string + description: The text input to the model. + type: object + required: + - type + - text + title: Input text + description: A text input to the model. + FileCitationBody: + properties: + type: + type: string + enum: + - file_citation + description: The type of the file citation. Always `file_citation`. + default: file_citation + x-stainless-const: true + file_id: + type: string + description: The ID of the file. + index: + type: integer + description: The index of the file in the list of files. + filename: + type: string + description: The filename of the file cited. + type: object + required: + - type + - file_id + - index + - filename + title: File citation + description: A citation to a file. + UrlCitationBody: + properties: + type: + type: string + enum: + - url_citation + description: The type of the URL citation. Always `url_citation`. + default: url_citation + x-stainless-const: true + url: + type: string + description: The URL of the web resource. + start_index: + type: integer + description: The index of the first character of the URL citation in the message. + end_index: + type: integer + description: The index of the last character of the URL citation in the message. + title: + type: string + description: The title of the web resource. + type: object + required: + - type + - url + - start_index + - end_index + - title + title: URL citation + description: A citation for a web resource used to generate a model response. + ContainerFileCitationBody: + properties: + type: + type: string + enum: + - container_file_citation + description: The type of the container file citation. Always `container_file_citation`. + default: container_file_citation + x-stainless-const: true + container_id: + type: string + description: The ID of the container file. + file_id: + type: string + description: The ID of the file. + start_index: + type: integer + description: The index of the first character of the container file citation in the message. + end_index: + type: integer + description: The index of the last character of the container file citation in the message. + filename: + type: string + description: The filename of the container file cited. + type: object + required: + - type + - container_id + - file_id + - start_index + - end_index + - filename + title: Container file citation + description: A citation for a container file used to generate a model response. + Annotation: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/FileCitationBody' + - $ref: '#/components/schemas/UrlCitationBody' + - $ref: '#/components/schemas/ContainerFileCitationBody' + - $ref: '#/components/schemas/FilePath' + TopLogProb: + properties: + token: + type: string + logprob: + type: number + bytes: + items: + type: integer + type: array + type: object + required: + - token + - logprob + - bytes + title: Top log probability + description: The top log probability of a token. + LogProb: + properties: + token: + type: string + logprob: + type: number + bytes: + items: + type: integer + type: array + top_logprobs: + items: + $ref: '#/components/schemas/TopLogProb' + type: array + type: object + required: + - token + - logprob + - bytes + - top_logprobs + title: Log probability + description: The log probability of a token. + OutputTextContent: + properties: + type: + type: string + enum: + - output_text + description: The type of the output text. Always `output_text`. + default: output_text + x-stainless-const: true + text: + type: string + description: The text output from the model. + annotations: + items: + $ref: '#/components/schemas/Annotation' + type: array + description: The annotations of the text output. + logprobs: + items: + $ref: '#/components/schemas/LogProb' + type: array + type: object + required: + - type + - text + - annotations + title: Output text + description: A text output from the model. + TextContent: + properties: + type: + type: string + enum: + - text + default: text + x-stainless-const: true + text: + type: string + type: object + required: + - type + - text + title: Text Content + description: A text content. + SummaryTextContent: + properties: + type: + type: string + enum: + - summary_text + description: The type of the object. Always `summary_text`. + default: summary_text + x-stainless-const: true + text: + type: string + description: A summary of the reasoning output from the model so far. + type: object + required: + - type + - text + title: Summary text + description: A summary text from the model. + ReasoningTextContent: + properties: + type: + type: string + enum: + - reasoning_text + description: The type of the reasoning text. Always `reasoning_text`. + default: reasoning_text + x-stainless-const: true + text: + type: string + description: The reasoning text from the model. + type: object + required: + - type + - text + title: ReasoningTextContent + description: Reasoning text from the model. + RefusalContent: + properties: + type: + type: string + enum: + - refusal + description: The type of the refusal. Always `refusal`. + default: refusal + x-stainless-const: true + refusal: + type: string + description: The refusal explanation from the model. + type: object + required: + - type + - refusal + title: Refusal + description: A refusal from the model. + ImageDetail: + type: string + enum: + - low + - high + - auto + InputImageContent: + properties: + type: + type: string + enum: + - input_image + description: The type of the input item. Always `input_image`. + default: input_image + x-stainless-const: true + image_url: + anyOf: + - type: string + description: >- + The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in + a data URL. + - type: 'null' + file_id: + anyOf: + - type: string + description: The ID of the file to be sent to the model. + - type: 'null' + detail: + $ref: '#/components/schemas/ImageDetail' + description: >- + The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults + to `auto`. + type: object + required: + - type + - detail + title: Input image + description: >- + An image input to the model. Learn about [image + inputs](https://platform.openai.com/docs/guides/vision). + ComputerScreenshotContent: + properties: + type: + type: string + enum: + - computer_screenshot + description: >- + Specifies the event type. For a computer screenshot, this property is always set to + `computer_screenshot`. + default: computer_screenshot + x-stainless-const: true + image_url: + anyOf: + - type: string + description: The URL of the screenshot image. + - type: 'null' + file_id: + anyOf: + - type: string + description: The identifier of an uploaded file that contains the screenshot. + - type: 'null' + type: object + required: + - type + - image_url + - file_id + title: Computer screenshot + description: A screenshot of a computer. + InputFileContent: + properties: + type: + type: string + enum: + - input_file + description: The type of the input item. Always `input_file`. + default: input_file + x-stainless-const: true + file_id: + anyOf: + - type: string + description: The ID of the file to be sent to the model. + - type: 'null' + filename: + type: string + description: The name of the file to be sent to the model. + file_url: + type: string + description: The URL of the file to be sent to the model. + file_data: + type: string + description: | + The content of the file to be sent to the model. + type: object + required: + - type + title: Input file + description: A file input to the model. + Message: + properties: + type: + type: string + enum: + - message + description: The type of the message. Always set to `message`. + default: message + x-stainless-const: true + id: + type: string + description: The unique ID of the message. + status: + $ref: '#/components/schemas/MessageStatus' + description: >- + The status of item. One of `in_progress`, `completed`, or `incomplete`. Populated when items are + returned via API. + role: + $ref: '#/components/schemas/MessageRole' + description: >- + The role of the message. One of `unknown`, `user`, `assistant`, `system`, `critic`, + `discriminator`, `developer`, or `tool`. + content: + items: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputTextContent' + - $ref: '#/components/schemas/OutputTextContent' + - $ref: '#/components/schemas/TextContent' + - $ref: '#/components/schemas/SummaryTextContent' + - $ref: '#/components/schemas/ReasoningTextContent' + - $ref: '#/components/schemas/RefusalContent' + - $ref: '#/components/schemas/InputImageContent' + - $ref: '#/components/schemas/ComputerScreenshotContent' + - $ref: '#/components/schemas/InputFileContent' + type: array + description: The content of the message + type: object + required: + - type + - id + - status + - role + - content + title: Message + description: A message to or from the model. + ClickButtonType: + type: string + enum: + - left + - right + - wheel + - back + - forward + ClickParam: + properties: + type: + type: string + enum: + - click + description: Specifies the event type. For a click action, this property is always `click`. + default: click + x-stainless-const: true + button: + $ref: '#/components/schemas/ClickButtonType' + description: >- + Indicates which mouse button was pressed during the click. One of `left`, `right`, `wheel`, + `back`, or `forward`. + x: + type: integer + description: The x-coordinate where the click occurred. + 'y': + type: integer + description: The y-coordinate where the click occurred. + type: object + required: + - type + - button + - x + - 'y' + title: Click + description: A click action. + DoubleClickAction: + properties: + type: + type: string + enum: + - double_click + description: >- + Specifies the event type. For a double click action, this property is always set to + `double_click`. + default: double_click + x-stainless-const: true + x: + type: integer + description: The x-coordinate where the double click occurred. + 'y': + type: integer + description: The y-coordinate where the double click occurred. + type: object + required: + - type + - x + - 'y' + title: DoubleClick + description: A double click action. + DragPoint: + properties: + x: + type: integer + description: The x-coordinate. + 'y': + type: integer + description: The y-coordinate. + type: object + required: + - x + - 'y' + title: Coordinate + description: 'An x/y coordinate pair, e.g. `{ x: 100, y: 200 }`.' + KeyPressAction: + properties: + type: + type: string + enum: + - keypress + description: Specifies the event type. For a keypress action, this property is always set to `keypress`. + default: keypress + x-stainless-const: true + keys: + items: + type: string + description: One of the keys the model is requesting to be pressed. + type: array + description: >- + The combination of keys the model is requesting to be pressed. This is an array of strings, each + representing a key. + type: object + required: + - type + - keys + title: KeyPress + description: A collection of keypresses the model would like to perform. + ComputerCallSafetyCheckParam: + properties: + id: + type: string + description: The ID of the pending safety check. + code: + anyOf: + - type: string + description: The type of the pending safety check. + - type: 'null' + message: + anyOf: + - type: string + description: Details about the pending safety check. + - type: 'null' + type: object + required: + - id + description: A pending safety check for the computer call. + CodeInterpreterOutputLogs: + properties: + type: + type: string + enum: + - logs + description: The type of the output. Always `logs`. + default: logs + x-stainless-const: true + logs: + type: string + description: The logs output from the code interpreter. + type: object + required: + - type + - logs + title: Code interpreter output logs + description: The logs output from the code interpreter. + CodeInterpreterOutputImage: + properties: + type: + type: string + enum: + - image + description: The type of the output. Always `image`. + default: image + x-stainless-const: true + url: + type: string + description: The URL of the image output from the code interpreter. + type: object + required: + - type + - url + title: Code interpreter output image + description: The image output from the code interpreter. + LocalShellExecAction: + properties: + type: + type: string + enum: + - exec + description: The type of the local shell action. Always `exec`. + default: exec + x-stainless-const: true + command: + items: + type: string + type: array + description: The command to run. + timeout_ms: + anyOf: + - type: integer + description: Optional timeout in milliseconds for the command. + - type: 'null' + working_directory: + anyOf: + - type: string + description: Optional working directory to run the command in. + - type: 'null' + env: + additionalProperties: + type: string + type: object + description: Environment variables to set for the command. + x-oaiTypeLabel: map + user: + anyOf: + - type: string + description: Optional user to run the command as. + - type: 'null' + type: object + required: + - type + - command + - env + title: Local shell exec action + description: Execute a shell command on the server. + FunctionShellAction: + properties: + commands: + items: + type: string + description: A list of commands to run. + type: array + timeout_ms: + anyOf: + - type: integer + description: Optional timeout in milliseconds for the commands. + - type: 'null' + max_output_length: + anyOf: + - type: integer + description: Optional maximum number of characters to return from each command. + - type: 'null' + type: object + required: + - commands + - timeout_ms + - max_output_length + title: Shell exec action + description: Execute a shell command. + LocalShellCallStatus: + type: string + enum: + - in_progress + - completed + - incomplete + FunctionShellCall: + properties: + type: + type: string + enum: + - shell_call + description: The type of the item. Always `shell_call`. + default: shell_call + x-stainless-const: true + id: + type: string + description: The unique ID of the function shell tool call. Populated when this item is returned via API. + call_id: + type: string + description: The unique ID of the function shell tool call generated by the model. + action: + $ref: '#/components/schemas/FunctionShellAction' + description: The shell commands and limits that describe how to run the tool call. + status: + $ref: '#/components/schemas/LocalShellCallStatus' + description: The status of the shell call. One of `in_progress`, `completed`, or `incomplete`. + created_by: + type: string + description: The ID of the entity that created this tool call. + type: object + required: + - type + - id + - call_id + - action + - status + title: Function shell tool call + description: A tool call that executes one or more shell commands in a managed environment. + FunctionShellCallOutputTimeoutOutcome: + properties: + type: + type: string + enum: + - timeout + description: The outcome type. Always `timeout`. + default: timeout + x-stainless-const: true + type: object + required: + - type + title: Function shell timeout outcome + description: Indicates that the function shell call exceeded its configured time limit. + FunctionShellCallOutputExitOutcome: + properties: + type: + type: string + enum: + - exit + description: The outcome type. Always `exit`. + default: exit + x-stainless-const: true + exit_code: + type: integer + description: Exit code from the shell process. + type: object + required: + - type + - exit_code + title: Function shell exit outcome + description: Indicates that the shell commands finished and returned an exit code. + FunctionShellCallOutputContent: + properties: + stdout: + type: string + stderr: + type: string + outcome: + title: Function shell call outcome + description: >- + Represents either an exit outcome (with an exit code) or a timeout outcome for a shell call output + chunk. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/FunctionShellCallOutputTimeoutOutcome' + - $ref: '#/components/schemas/FunctionShellCallOutputExitOutcome' + created_by: + type: string + type: object + required: + - stdout + - stderr + - outcome + title: Shell call output content + description: The content of a shell call output. + FunctionShellCallOutput: + properties: + type: + type: string + enum: + - shell_call_output + description: The type of the shell call output. Always `shell_call_output`. + default: shell_call_output + x-stainless-const: true + id: + type: string + description: The unique ID of the shell call output. Populated when this item is returned via API. + call_id: + type: string + description: The unique ID of the shell tool call generated by the model. + output: + items: + $ref: '#/components/schemas/FunctionShellCallOutputContent' + type: array + description: An array of shell call output contents + max_output_length: + anyOf: + - type: integer + description: >- + The maximum length of the shell command output. This is generated by the model and should be + passed back with the raw output. + - type: 'null' + created_by: + type: string + type: object + required: + - type + - id + - call_id + - output + - max_output_length + title: Shell call output + description: The output of a shell tool call. + ApplyPatchCallStatus: + type: string + enum: + - in_progress + - completed + ApplyPatchCreateFileOperation: + properties: + type: + type: string + enum: + - create_file + description: Create a new file with the provided diff. + default: create_file + x-stainless-const: true + path: + type: string + description: Path of the file to create. + diff: + type: string + description: Diff to apply. + type: object + required: + - type + - path + - diff + title: Apply patch create file operation + description: Instruction describing how to create a file via the apply_patch tool. + ApplyPatchDeleteFileOperation: + properties: + type: + type: string + enum: + - delete_file + description: Delete the specified file. + default: delete_file + x-stainless-const: true + path: + type: string + description: Path of the file to delete. + type: object + required: + - type + - path + title: Apply patch delete file operation + description: Instruction describing how to delete a file via the apply_patch tool. + ApplyPatchUpdateFileOperation: + properties: + type: + type: string + enum: + - update_file + description: Update an existing file with the provided diff. + default: update_file + x-stainless-const: true + path: + type: string + description: Path of the file to update. + diff: + type: string + description: Diff to apply. + type: object + required: + - type + - path + - diff + title: Apply patch update file operation + description: Instruction describing how to update a file via the apply_patch tool. + ApplyPatchToolCall: + properties: + type: + type: string + enum: + - apply_patch_call + description: The type of the item. Always `apply_patch_call`. + default: apply_patch_call + x-stainless-const: true + id: + type: string + description: The unique ID of the apply patch tool call. Populated when this item is returned via API. + call_id: + type: string + description: The unique ID of the apply patch tool call generated by the model. + status: + $ref: '#/components/schemas/ApplyPatchCallStatus' + description: The status of the apply patch tool call. One of `in_progress` or `completed`. + operation: + title: Apply patch operation + description: One of the create_file, delete_file, or update_file operations applied via apply_patch. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ApplyPatchCreateFileOperation' + - $ref: '#/components/schemas/ApplyPatchDeleteFileOperation' + - $ref: '#/components/schemas/ApplyPatchUpdateFileOperation' + created_by: + type: string + description: The ID of the entity that created this tool call. + type: object + required: + - type + - id + - call_id + - status + title: Apply patch tool call + description: A tool call that applies file diffs by creating, deleting, or updating files. + ApplyPatchCallOutputStatus: + type: string + enum: + - completed + - failed + ApplyPatchToolCallOutput: + properties: + type: + type: string + enum: + - apply_patch_call_output + description: The type of the item. Always `apply_patch_call_output`. + default: apply_patch_call_output + x-stainless-const: true + id: + type: string + description: The unique ID of the apply patch tool call output. Populated when this item is returned via API. + call_id: + type: string + description: The unique ID of the apply patch tool call generated by the model. + status: + $ref: '#/components/schemas/ApplyPatchCallOutputStatus' + description: The status of the apply patch tool call output. One of `completed` or `failed`. + output: + anyOf: + - type: string + description: Optional textual output returned by the apply patch tool. + - type: 'null' + created_by: + type: string + description: The ID of the entity that created this tool call output. + type: object + required: + - type + - id + - call_id + - status + - output + title: Apply patch tool call output + description: The output emitted by an apply patch tool call. + MCPToolCallStatus: + type: string + enum: + - in_progress + - completed + - incomplete + - calling + - failed + DetailEnum: + type: string + enum: + - low + - high + - auto + FunctionCallItemStatus: + type: string + enum: + - in_progress + - completed + - incomplete + ComputerCallOutputItemParam: + properties: + id: + anyOf: + - type: string + description: The ID of the computer tool call output. + example: cuo_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The ID of the computer tool call that produced the output. + type: + type: string + enum: + - computer_call_output + description: The type of the computer tool call output. Always `computer_call_output`. + default: computer_call_output + x-stainless-const: true + output: + $ref: '#/components/schemas/ComputerScreenshotImage' + acknowledged_safety_checks: + anyOf: + - items: + $ref: '#/components/schemas/ComputerCallSafetyCheckParam' + type: array + description: The safety checks reported by the API that have been acknowledged by the developer. + - type: 'null' + status: + anyOf: + - $ref: '#/components/schemas/FunctionCallItemStatus' + description: >- + The status of the message input. One of `in_progress`, `completed`, or `incomplete`. Populated + when input items are returned via API. + - type: 'null' + type: object + required: + - call_id + - type + - output + title: Computer tool call output + description: The output of a computer tool call. + InputTextContentParam: + properties: + type: + type: string + enum: + - input_text + description: The type of the input item. Always `input_text`. + default: input_text + x-stainless-const: true + text: + type: string + maxLength: 10485760 + description: The text input to the model. + type: object + required: + - type + - text + title: Input text + description: A text input to the model. + InputImageContentParamAutoParam: + properties: + type: + type: string + enum: + - input_image + description: The type of the input item. Always `input_image`. + default: input_image + x-stainless-const: true + image_url: + anyOf: + - type: string + maxLength: 20971520 + description: >- + The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in + a data URL. + - type: 'null' + file_id: + anyOf: + - type: string + description: The ID of the file to be sent to the model. + example: file-123 + - type: 'null' + detail: + anyOf: + - $ref: '#/components/schemas/DetailEnum' + description: >- + The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. + Defaults to `auto`. + - type: 'null' + type: object + required: + - type + title: Input image + description: >- + An image input to the model. Learn about [image + inputs](https://platform.openai.com/docs/guides/vision) + InputFileContentParam: + properties: + type: + type: string + enum: + - input_file + description: The type of the input item. Always `input_file`. + default: input_file + x-stainless-const: true + file_id: + anyOf: + - type: string + description: The ID of the file to be sent to the model. + example: file-123 + - type: 'null' + filename: + anyOf: + - type: string + description: The name of the file to be sent to the model. + - type: 'null' + file_data: + anyOf: + - type: string + maxLength: 33554432 + description: The base64-encoded data of the file to be sent to the model. + - type: 'null' + file_url: + anyOf: + - type: string + description: The URL of the file to be sent to the model. + - type: 'null' + type: object + required: + - type + title: Input file + description: A file input to the model. + FunctionCallOutputItemParam: + properties: + id: + anyOf: + - type: string + description: The unique ID of the function tool call output. Populated when this item is returned via API. + example: fc_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The unique ID of the function tool call generated by the model. + type: + type: string + enum: + - function_call_output + description: The type of the function tool call output. Always `function_call_output`. + default: function_call_output + x-stainless-const: true + output: + description: Text, image, or file output of the function tool call. + anyOf: + - type: string + maxLength: 10485760 + description: A JSON string of the output of the function tool call. + - items: + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/InputTextContentParam' + - $ref: '#/components/schemas/InputImageContentParamAutoParam' + - $ref: '#/components/schemas/InputFileContentParam' + type: array + status: + anyOf: + - $ref: '#/components/schemas/FunctionCallItemStatus' + description: >- + The status of the item. One of `in_progress`, `completed`, or `incomplete`. Populated when + items are returned via API. + - type: 'null' + type: object + required: + - call_id + - type + - output + title: Function tool call output + description: The output of a function tool call. + FunctionShellActionParam: + properties: + commands: + items: + type: string + type: array + description: Ordered shell commands for the execution environment to run. + timeout_ms: + anyOf: + - type: integer + description: Maximum wall-clock time in milliseconds to allow the shell commands to run. + - type: 'null' + max_output_length: + anyOf: + - type: integer + description: Maximum number of UTF-8 characters to capture from combined stdout and stderr output. + - type: 'null' + type: object + required: + - commands + title: Function shell action + description: Commands and limits describing how to run the function shell tool call. + FunctionShellCallItemStatus: + type: string + enum: + - in_progress + - completed + - incomplete + title: Function shell call status + description: Status values reported for function shell tool calls. + FunctionShellCallItemParam: + properties: + id: + anyOf: + - type: string + description: The unique ID of the function shell tool call. Populated when this item is returned via API. + example: sh_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The unique ID of the function shell tool call generated by the model. + type: + type: string + enum: + - shell_call + description: The type of the item. Always `function_shell_call`. + default: shell_call + x-stainless-const: true + action: + $ref: '#/components/schemas/FunctionShellActionParam' + description: The shell commands and limits that describe how to run the tool call. + status: + anyOf: + - $ref: '#/components/schemas/FunctionShellCallItemStatus' + description: The status of the shell call. One of `in_progress`, `completed`, or `incomplete`. + - type: 'null' + type: object + required: + - call_id + - type + - action + title: Function shell tool call + description: A tool representing a request to execute one or more shell commands. + FunctionShellCallOutputTimeoutOutcomeParam: + properties: + type: + type: string + enum: + - timeout + description: The outcome type. Always `timeout`. + default: timeout + x-stainless-const: true + type: object + required: + - type + title: Function shell timeout outcome + description: Indicates that the function shell call exceeded its configured time limit. + FunctionShellCallOutputExitOutcomeParam: + properties: + type: + type: string + enum: + - exit + description: The outcome type. Always `exit`. + default: exit + x-stainless-const: true + exit_code: + type: integer + description: The exit code returned by the shell process. + type: object + required: + - type + - exit_code + title: Function shell exit outcome + description: Indicates that the shell commands finished and returned an exit code. + FunctionShellCallOutputOutcomeParam: + title: Function shell call outcome + description: The exit or timeout outcome associated with this chunk. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/FunctionShellCallOutputTimeoutOutcomeParam' + - $ref: '#/components/schemas/FunctionShellCallOutputExitOutcomeParam' + FunctionShellCallOutputContentParam: + properties: + stdout: + type: string + maxLength: 10485760 + description: Captured stdout output for this chunk of the shell call. + stderr: + type: string + maxLength: 10485760 + description: Captured stderr output for this chunk of the shell call. + outcome: + $ref: '#/components/schemas/FunctionShellCallOutputOutcomeParam' + description: The exit or timeout outcome associated with this chunk. + type: object + required: + - stdout + - stderr + - outcome + title: Function shell output chunk + description: Captured stdout and stderr for a portion of a function shell tool call output. + FunctionShellCallOutputItemParam: + properties: + id: + anyOf: + - type: string + description: >- + The unique ID of the function shell tool call output. Populated when this item is returned via + API. + example: sho_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The unique ID of the function shell tool call generated by the model. + type: + type: string + enum: + - shell_call_output + description: The type of the item. Always `function_shell_call_output`. + default: shell_call_output + x-stainless-const: true + output: + items: + $ref: '#/components/schemas/FunctionShellCallOutputContentParam' + type: array + description: Captured chunks of stdout and stderr output, along with their associated outcomes. + max_output_length: + anyOf: + - type: integer + description: The maximum number of UTF-8 characters captured for this shell call's combined output. + - type: 'null' + type: object + required: + - call_id + - type + - output + title: Function shell tool call output + description: The streamed output items emitted by a function shell tool call. + ApplyPatchCallStatusParam: + type: string + enum: + - in_progress + - completed + title: Apply patch call status + description: Status values reported for apply_patch tool calls. + ApplyPatchCreateFileOperationParam: + properties: + type: + type: string + enum: + - create_file + description: The operation type. Always `create_file`. + default: create_file + x-stainless-const: true + path: + type: string + minLength: 1 + description: Path of the file to create relative to the workspace root. + diff: + type: string + maxLength: 10485760 + description: Unified diff content to apply when creating the file. + type: object + required: + - type + - path + - diff + title: Apply patch create file operation + description: Instruction for creating a new file via the apply_patch tool. + ApplyPatchDeleteFileOperationParam: + properties: + type: + type: string + enum: + - delete_file + description: The operation type. Always `delete_file`. + default: delete_file + x-stainless-const: true + path: + type: string + minLength: 1 + description: Path of the file to delete relative to the workspace root. + type: object + required: + - type + - path + title: Apply patch delete file operation + description: Instruction for deleting an existing file via the apply_patch tool. + ApplyPatchUpdateFileOperationParam: + properties: + type: + type: string + enum: + - update_file + description: The operation type. Always `update_file`. + default: update_file + x-stainless-const: true + path: + type: string + minLength: 1 + description: Path of the file to update relative to the workspace root. + diff: + type: string + maxLength: 10485760 + description: Unified diff content to apply to the existing file. + type: object + required: + - type + - path + - diff + title: Apply patch update file operation + description: Instruction for updating an existing file via the apply_patch tool. + ApplyPatchOperationParam: + title: Apply patch operation + description: One of the create_file, delete_file, or update_file operations supplied to the apply_patch tool. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ApplyPatchCreateFileOperationParam' + - $ref: '#/components/schemas/ApplyPatchDeleteFileOperationParam' + - $ref: '#/components/schemas/ApplyPatchUpdateFileOperationParam' + ApplyPatchToolCallItemParam: + properties: + type: + type: string + enum: + - apply_patch_call + description: The type of the item. Always `apply_patch_call`. + default: apply_patch_call + x-stainless-const: true + id: + anyOf: + - type: string + description: The unique ID of the apply patch tool call. Populated when this item is returned via API. + example: apc_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The unique ID of the apply patch tool call generated by the model. + status: + $ref: '#/components/schemas/ApplyPatchCallStatusParam' + description: The status of the apply patch tool call. One of `in_progress` or `completed`. + operation: + $ref: '#/components/schemas/ApplyPatchOperationParam' + description: The specific create, delete, or update instruction for the apply_patch tool call. + type: object + required: + - type + - call_id + - status + - operation + title: Apply patch tool call + description: A tool call representing a request to create, delete, or update files using diff patches. + ApplyPatchCallOutputStatusParam: + type: string + enum: + - completed + - failed + title: Apply patch call output status + description: Outcome values reported for apply_patch tool call outputs. + ApplyPatchToolCallOutputItemParam: + properties: + type: + type: string + enum: + - apply_patch_call_output + description: The type of the item. Always `apply_patch_call_output`. + default: apply_patch_call_output + x-stainless-const: true + id: + anyOf: + - type: string + description: >- + The unique ID of the apply patch tool call output. Populated when this item is returned via + API. + example: apco_123 + - type: 'null' + call_id: + type: string + maxLength: 64 + minLength: 1 + description: The unique ID of the apply patch tool call generated by the model. + status: + $ref: '#/components/schemas/ApplyPatchCallOutputStatusParam' + description: The status of the apply patch tool call output. One of `completed` or `failed`. + output: + type: string + maxLength: 10485760 + description: Optional human-readable log text from the apply patch tool (e.g., patch results or errors). + type: object + required: + - type + - call_id + - status + title: Apply patch tool call output + description: The streamed output emitted by an apply patch tool call. + ItemReferenceParam: + properties: + type: + anyOf: + - type: string + enum: + - item_reference + description: The type of item to reference. Always `item_reference`. + default: item_reference + x-stainless-const: true + - type: 'null' + id: + type: string + description: The ID of the item to reference. + type: object + required: + - id + title: Item reference + description: An internal identifier for an item to reference. + ConversationResource: + properties: + id: + type: string + description: The unique ID of the conversation. + object: + type: string + enum: + - conversation + description: The object type, which is always `conversation`. + default: conversation + x-stainless-const: true + metadata: + description: >- + Set of 16 key-value pairs that can be attached to an object. This can be useful for + storing additional information about the object in a structured format, and querying for + objects via API or the dashboard. + Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters. + created_at: + type: integer + description: The time at which the conversation was created, measured in seconds since the Unix epoch. + type: object + required: + - id + - object + - metadata + - created_at + FunctionTool: + properties: + type: + type: string + enum: + - function + description: The type of the function tool. Always `function`. + default: function + x-stainless-const: true + name: + type: string + description: The name of the function to call. + description: + anyOf: + - type: string + description: >- + A description of the function. Used by the model to determine whether or not to call the + function. + - type: 'null' + parameters: + anyOf: + - additionalProperties: {} + type: object + description: A JSON schema object describing the parameters of the function. + x-oaiTypeLabel: map + - type: 'null' + strict: + anyOf: + - type: boolean + description: Whether to enforce strict parameter validation. Default `true`. + - type: 'null' + type: object + required: + - type + - name + - strict + - parameters + title: Function + description: >- + Defines a function in your own code the model can choose to call. Learn more about [function + calling](https://platform.openai.com/docs/guides/function-calling). + RankerVersionType: + type: string + enum: + - auto + - default-2024-11-15 + HybridSearchOptions: + properties: + embedding_weight: + type: number + description: The weight of the embedding in the reciprocal ranking fusion. + text_weight: + type: number + description: The weight of the text in the reciprocal ranking fusion. + type: object + required: + - embedding_weight + - text_weight + RankingOptions: + properties: + ranker: + $ref: '#/components/schemas/RankerVersionType' + description: The ranker to use for the file search. + score_threshold: + type: number + description: >- + The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will + attempt to return only the most relevant results, but may return fewer results. + hybrid_search: + $ref: '#/components/schemas/HybridSearchOptions' + description: >- + Weights that control how reciprocal rank fusion balances semantic embedding matches versus sparse + keyword matches when hybrid search is enabled. + type: object + required: [] + Filters: + anyOf: + - $ref: '#/components/schemas/ComparisonFilter' + - $ref: '#/components/schemas/CompoundFilter' + FileSearchTool: + properties: + type: + type: string + enum: + - file_search + description: The type of the file search tool. Always `file_search`. + default: file_search + x-stainless-const: true + vector_store_ids: + items: + type: string + type: array + description: The IDs of the vector stores to search. + max_num_results: + type: integer + description: The maximum number of results to return. This number should be between 1 and 50 inclusive. + ranking_options: + $ref: '#/components/schemas/RankingOptions' + description: Ranking options for search. + filters: + anyOf: + - $ref: '#/components/schemas/Filters' + description: A filter to apply. + - type: 'null' + type: object + required: + - type + - vector_store_ids + title: File search + description: >- + A tool that searches for relevant content from uploaded files. Learn more about the [file search + tool](https://platform.openai.com/docs/guides/tools-file-search). + ComputerEnvironment: + type: string + enum: + - windows + - mac + - linux + - ubuntu + - browser + ComputerUsePreviewTool: + properties: + type: + type: string + enum: + - computer_use_preview + description: The type of the computer use tool. Always `computer_use_preview`. + default: computer_use_preview + x-stainless-const: true + environment: + $ref: '#/components/schemas/ComputerEnvironment' + description: The type of computer environment to control. + display_width: + type: integer + description: The width of the computer display. + display_height: + type: integer + description: The height of the computer display. + type: object + required: + - type + - environment + - display_width + - display_height + title: Computer use preview + description: >- + A tool that controls a virtual computer. Learn more about the [computer + tool](https://platform.openai.com/docs/guides/tools-computer-use). + ContainerMemoryLimit: + type: string + enum: + - 1g + - 4g + - 16g + - 64g + InputFidelity: + type: string + enum: + - high + - low + description: >- + Control how much effort the model will exert to match the style and features, especially facial + features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for + `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + LocalShellToolParam: + properties: + type: + type: string + enum: + - local_shell + description: The type of the local shell tool. Always `local_shell`. + default: local_shell + x-stainless-const: true + type: object + required: + - type + title: Local shell tool + description: A tool that allows the model to execute shell commands in a local environment. + FunctionShellToolParam: + properties: + type: + type: string + enum: + - shell + description: The type of the shell tool. Always `shell`. + default: shell + x-stainless-const: true + type: object + required: + - type + title: Shell tool + description: A tool that allows the model to execute shell commands. + CustomTextFormatParam: + properties: + type: + type: string + enum: + - text + description: Unconstrained text format. Always `text`. + default: text + x-stainless-const: true + type: object + required: + - type + title: Text format + description: Unconstrained free-form text. + GrammarSyntax1: + type: string + enum: + - lark + - regex + CustomGrammarFormatParam: + properties: + type: + type: string + enum: + - grammar + description: Grammar format. Always `grammar`. + default: grammar + x-stainless-const: true + syntax: + $ref: '#/components/schemas/GrammarSyntax1' + description: The syntax of the grammar definition. One of `lark` or `regex`. + definition: + type: string + description: The grammar definition. + type: object + required: + - type + - syntax + - definition + title: Grammar format + description: A grammar defined by the user. + CustomToolParam: + properties: + type: + type: string + enum: + - custom + description: The type of the custom tool. Always `custom`. + default: custom + x-stainless-const: true + name: + type: string + description: The name of the custom tool, used to identify it in tool calls. + description: + type: string + description: Optional description of the custom tool, used to provide more context. + format: + description: The input format for the custom tool. Default is unconstrained text. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/CustomTextFormatParam' + - $ref: '#/components/schemas/CustomGrammarFormatParam' + type: object + required: + - type + - name + title: Custom tool + description: >- + A custom tool that processes input using a specified format. Learn more about [custom + tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) + ApproximateLocation: + properties: + type: + type: string + enum: + - approximate + description: The type of location approximation. Always `approximate`. + default: approximate + x-stainless-const: true + country: + anyOf: + - type: string + description: >- + The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. + `US`. + - type: 'null' + region: + anyOf: + - type: string + description: Free text input for the region of the user, e.g. `California`. + - type: 'null' + city: + anyOf: + - type: string + description: Free text input for the city of the user, e.g. `San Francisco`. + - type: 'null' + timezone: + anyOf: + - type: string + description: >- + The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. + `America/Los_Angeles`. + - type: 'null' + type: object + required: + - type + SearchContextSize: + type: string + enum: + - low + - medium + - high + WebSearchPreviewTool: + properties: + type: + type: string + enum: + - web_search_preview + - web_search_preview_2025_03_11 + description: The type of the web search tool. One of `web_search_preview` or `web_search_preview_2025_03_11`. + default: web_search_preview + x-stainless-const: true + user_location: + anyOf: + - $ref: '#/components/schemas/ApproximateLocation' + description: The user's location. + - type: 'null' + search_context_size: + $ref: '#/components/schemas/SearchContextSize' + description: >- + High level guidance for the amount of context window space to use for the search. One of `low`, + `medium`, or `high`. `medium` is the default. + type: object + required: + - type + title: Web search preview + description: >- + This tool searches the web for relevant results to use in a response. Learn more about the [web search + tool](https://platform.openai.com/docs/guides/tools-web-search). + ApplyPatchToolParam: + properties: + type: + type: string + enum: + - apply_patch + description: The type of the tool. Always `apply_patch`. + default: apply_patch + x-stainless-const: true + type: object + required: + - type + title: Apply patch tool + description: Allows the assistant to create, delete, or update files using unified diffs. + ImageGenInputUsageDetails: + properties: + text_tokens: + type: integer + description: The number of text tokens in the input prompt. + image_tokens: + type: integer + description: The number of image tokens in the input prompt. + type: object + required: + - text_tokens + - image_tokens + title: Input usage details + description: The input tokens detailed information for the image generation. + ImageGenUsage: + properties: + input_tokens: + type: integer + description: The number of tokens (images and text) in the input prompt. + total_tokens: + type: integer + description: The total number of tokens (images and text) used for the image generation. + output_tokens: + type: integer + description: The number of output tokens generated by the model. + input_tokens_details: + $ref: '#/components/schemas/ImageGenInputUsageDetails' + type: object + required: + - input_tokens + - total_tokens + - output_tokens + - input_tokens_details + title: Image generation usage + description: For `gpt-image-1` only, the token usage information for the image generation. + SpecificApplyPatchParam: + properties: + type: + type: string + enum: + - apply_patch + description: The tool to call. Always `apply_patch`. + default: apply_patch + x-stainless-const: true + type: object + required: + - type + title: Specific apply patch tool choice + description: Forces the model to call the apply_patch tool when executing a tool call. + SpecificFunctionShellParam: + properties: + type: + type: string + enum: + - shell + description: The tool to call. Always `shell`. + default: shell + x-stainless-const: true + type: object + required: + - type + title: Specific shell tool choice + description: Forces the model to call the function shell tool when a tool call is required. + ConversationParam-2: + properties: + id: + type: string + description: The unique ID of the conversation. + example: conv_123 + type: object + required: + - id + title: Conversation object + description: The conversation that this response belongs to. + Conversation-2: + properties: + id: + type: string + description: The unique ID of the conversation. + type: object + required: + - id + title: Conversation + description: >- + The conversation that this response belongs to. Input items and output items from this response are + automatically added to this conversation. + CreateConversationBody: + properties: + metadata: + anyOf: + - $ref: '#/components/schemas/Metadata' + description: >- + Set of 16 key-value pairs that can be attached to an object. This can be useful for + storing additional information about the object in a structured format, and querying + for objects via API or the dashboard. + Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters. + - type: 'null' + items: + anyOf: + - items: + $ref: '#/components/schemas/InputItem' + type: array + maxItems: 20 + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + - type: 'null' + type: object + required: [] + UpdateConversationBody: + properties: + metadata: + $ref: '#/components/schemas/Metadata' + description: >- + Set of 16 key-value pairs that can be attached to an object. This can be useful for + storing additional information about the object in a structured format, and querying for + objects via API or the dashboard. + Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters. + type: object + required: + - metadata + DeletedConversationResource: + properties: + object: + type: string + enum: + - conversation.deleted + default: conversation.deleted + x-stainless-const: true + deleted: + type: boolean + id: + type: string + type: object + required: + - object + - deleted + - id + OrderEnum: + type: string + enum: + - asc + - desc + VideoModel: + type: string + enum: + - sora-2 + - sora-2-pro + VideoStatus: + type: string + enum: + - queued + - in_progress + - completed + - failed + VideoSize: + type: string + enum: + - 720x1280 + - 1280x720 + - 1024x1792 + - 1792x1024 + VideoSeconds: + type: string + enum: + - '4' + - '8' + - '12' + Error-2: + properties: + code: + type: string + message: + type: string + type: object + required: + - code + - message + VideoResource: + properties: + id: + type: string + description: Unique identifier for the video job. + object: + type: string + enum: + - video + description: The object type, which is always `video`. + default: video + x-stainless-const: true + model: + $ref: '#/components/schemas/VideoModel' + description: The video generation model that produced the job. + status: + $ref: '#/components/schemas/VideoStatus' + description: Current lifecycle status of the video job. + progress: + type: integer + description: Approximate completion percentage for the generation task. + created_at: + type: integer + description: Unix timestamp (seconds) for when the job was created. + completed_at: + anyOf: + - type: integer + description: Unix timestamp (seconds) for when the job completed, if finished. + - type: 'null' + expires_at: + anyOf: + - type: integer + description: Unix timestamp (seconds) for when the downloadable assets expire, if set. + - type: 'null' + prompt: + anyOf: + - type: string + description: The prompt that was used to generate the video. + - type: 'null' + size: + $ref: '#/components/schemas/VideoSize' + description: The resolution of the generated video. + seconds: + $ref: '#/components/schemas/VideoSeconds' + description: Duration of the generated clip in seconds. + remixed_from_video_id: + anyOf: + - type: string + description: Identifier of the source video if this video is a remix. + - type: 'null' + error: + anyOf: + - $ref: '#/components/schemas/Error-2' + description: Error payload that explains why generation failed, if applicable. + - type: 'null' + type: object + required: + - id + - object + - model + - status + - progress + - created_at + - completed_at + - expires_at + - prompt + - size + - seconds + - remixed_from_video_id + - error + title: Video job + description: Structured information describing a generated video job. + VideoListResource: + properties: + object: + description: The type of object returned, must be `list`. + default: list + x-stainless-const: true + const: list + data: + items: + $ref: '#/components/schemas/VideoResource' + type: array + description: A list of items + first_id: + anyOf: + - type: string + description: The ID of the first item in the list. + - type: 'null' + last_id: + anyOf: + - type: string + description: The ID of the last item in the list. + - type: 'null' + has_more: + type: boolean + description: Whether there are more items available. + type: object + required: + - object + - data + - first_id + - last_id + - has_more + CreateVideoBody: + properties: + model: + $ref: '#/components/schemas/VideoModel' + description: The video generation model to use. Defaults to `sora-2`. + prompt: + type: string + maxLength: 32000 + minLength: 1 + description: Text prompt that describes the video to generate. + input_reference: + type: string + format: binary + description: Optional image reference that guides generation. + seconds: + $ref: '#/components/schemas/VideoSeconds' + description: Clip duration in seconds. Defaults to 4 seconds. + size: + $ref: '#/components/schemas/VideoSize' + description: Output resolution formatted as width x height. Defaults to 720x1280. + type: object + required: + - prompt + title: Create video request + description: Parameters for creating a new video generation job. + DeletedVideoResource: + properties: + object: + type: string + enum: + - video.deleted + description: The object type that signals the deletion response. + default: video.deleted + x-stainless-const: true + deleted: + type: boolean + description: Indicates that the video resource was deleted. + id: + type: string + description: Identifier of the deleted video. + type: object + required: + - object + - deleted + - id + title: Deleted video response + description: Confirmation payload returned after deleting a video. + VideoContentVariant: + type: string + enum: + - video + - thumbnail + - spritesheet + CreateVideoRemixBody: + properties: + prompt: + type: string + maxLength: 32000 + minLength: 1 + description: Updated text prompt that directs the remix generation. + type: object + required: + - prompt + title: Create video remix request + description: Parameters for remixing an existing generated video. + TruncationEnum: + type: string + enum: + - auto + - disabled + TokenCountsBody: + properties: + model: + anyOf: + - type: string + description: >- + Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a wide range of + models with different capabilities, performance characteristics, and price points. Refer to + the [model guide](https://platform.openai.com/docs/models) to browse and compare available + models. + - type: 'null' + input: + anyOf: + - description: Text, image, or file inputs to the model, used to generate a response + anyOf: + - type: string + maxLength: 10485760 + description: A text input to the model, equivalent to a text input with the `user` role. + - items: + $ref: '#/components/schemas/InputItem' + type: array + - type: 'null' + previous_response_id: + anyOf: + - type: string + description: >- + The unique ID of the previous response to the model. Use this to create multi-turn + conversations. Learn more about [conversation + state](https://platform.openai.com/docs/guides/conversation-state). Cannot be used in + conjunction with `conversation`. + example: resp_123 + - type: 'null' + tools: + anyOf: + - items: + $ref: '#/components/schemas/Tool' + type: array + description: >- + An array of tools the model may call while generating a response. You can specify which tool + to use by setting the `tool_choice` parameter. + - type: 'null' + text: + anyOf: + - $ref: '#/components/schemas/ResponseTextParam' + - type: 'null' + reasoning: + anyOf: + - $ref: '#/components/schemas/Reasoning' + description: >- + **gpt-5 and o-series models only** Configuration options for [reasoning + models](https://platform.openai.com/docs/guides/reasoning). + - type: 'null' + truncation: + $ref: '#/components/schemas/TruncationEnum' + description: >- + The truncation strategy to use for the model response. - `auto`: If the input to this Response + exceeds the model's context window size, the model will truncate the response to fit the context + window by dropping items from the beginning of the conversation. - `disabled` (default): If the + input size will exceed the context window size for a model, the request will fail with a 400 + error. + instructions: + anyOf: + - type: string + description: >- + A system (or developer) message inserted into the model's context. + + When used along with `previous_response_id`, the instructions from a previous response will + not be carried over to the next response. This makes it simple to swap out system (or + developer) messages in new responses. + - type: 'null' + conversation: + anyOf: + - $ref: '#/components/schemas/ConversationParam' + - type: 'null' + tool_choice: + anyOf: + - $ref: '#/components/schemas/ToolChoiceParam' + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + description: Whether to allow the model to run tool calls in parallel. + - type: 'null' + type: object + required: [] + TokenCountsResource: + properties: + object: + type: string + enum: + - response.input_tokens + default: response.input_tokens + x-stainless-const: true + input_tokens: + type: integer + type: object + required: + - object + - input_tokens + title: Token counts + example: + object: response.input_tokens + input_tokens: 123 + ChatkitWorkflowTracing: + properties: + enabled: + type: boolean + description: Indicates whether tracing is enabled. + type: object + required: + - enabled + title: Tracing Configuration + description: Controls diagnostic tracing during the session. + ChatkitWorkflow: + properties: + id: + type: string + description: Identifier of the workflow backing the session. + version: + anyOf: + - type: string + description: >- + Specific workflow version used for the session. Defaults to null when using the latest + deployment. + - type: 'null' + state_variables: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: integer + - type: boolean + - type: number + type: object + description: >- + State variable key-value pairs applied when invoking the workflow. Defaults to null when no + overrides were provided. + x-oaiTypeLabel: map + - type: 'null' + tracing: + $ref: '#/components/schemas/ChatkitWorkflowTracing' + description: Tracing settings applied to the workflow. + type: object + required: + - id + - version + - state_variables + - tracing + title: Workflow + description: Workflow metadata and state returned for the session. + ChatSessionRateLimits: + properties: + max_requests_per_1_minute: + type: integer + description: Maximum allowed requests per one-minute window. + type: object + required: + - max_requests_per_1_minute + title: Rate limits + description: Active per-minute request limit for the session. + ChatSessionStatus: + type: string + enum: + - active + - expired + - cancelled + ChatSessionAutomaticThreadTitling: + properties: + enabled: + type: boolean + description: Whether automatic thread titling is enabled. + type: object + required: + - enabled + title: Automatic thread titling + description: Automatic thread title preferences for the session. + ChatSessionFileUpload: + properties: + enabled: + type: boolean + description: Indicates if uploads are enabled for the session. + max_file_size: + anyOf: + - type: integer + description: Maximum upload size in megabytes. + - type: 'null' + max_files: + anyOf: + - type: integer + description: Maximum number of uploads allowed during the session. + - type: 'null' + type: object + required: + - enabled + - max_file_size + - max_files + title: File upload settings + description: Upload permissions and limits applied to the session. + ChatSessionHistory: + properties: + enabled: + type: boolean + description: Indicates if chat history is persisted for the session. + recent_threads: + anyOf: + - type: integer + description: >- + Number of prior threads surfaced in history views. Defaults to null when all history is + retained. + - type: 'null' + type: object + required: + - enabled + - recent_threads + title: History settings + description: History retention preferences returned for the session. + ChatSessionChatkitConfiguration: + properties: + automatic_thread_titling: + $ref: '#/components/schemas/ChatSessionAutomaticThreadTitling' + description: Automatic thread titling preferences. + file_upload: + $ref: '#/components/schemas/ChatSessionFileUpload' + description: Upload settings for the session. + history: + $ref: '#/components/schemas/ChatSessionHistory' + description: History retention configuration. + type: object + required: + - automatic_thread_titling + - file_upload + - history + title: ChatKit configuration + description: ChatKit configuration for the session. + ChatSessionResource: + properties: + id: + type: string + description: Identifier for the ChatKit session. + object: + type: string + enum: + - chatkit.session + description: Type discriminator that is always `chatkit.session`. + default: chatkit.session + x-stainless-const: true + expires_at: + type: integer + description: Unix timestamp (in seconds) for when the session expires. + client_secret: + type: string + description: Ephemeral client secret that authenticates session requests. + workflow: + $ref: '#/components/schemas/ChatkitWorkflow' + description: Workflow metadata for the session. + user: + type: string + description: User identifier associated with the session. + rate_limits: + $ref: '#/components/schemas/ChatSessionRateLimits' + description: Resolved rate limit values. + max_requests_per_1_minute: + type: integer + description: Convenience copy of the per-minute request limit. + status: + $ref: '#/components/schemas/ChatSessionStatus' + description: Current lifecycle state of the session. + chatkit_configuration: + $ref: '#/components/schemas/ChatSessionChatkitConfiguration' + description: Resolved ChatKit feature configuration for the session. + type: object + required: + - id + - object + - expires_at + - client_secret + - workflow + - user + - rate_limits + - max_requests_per_1_minute + - status + - chatkit_configuration + title: The chat session object + description: Represents a ChatKit session and its resolved configuration. + WorkflowTracingParam: + properties: + enabled: + type: boolean + description: Whether tracing is enabled during the session. Defaults to true. + type: object + required: [] + title: Tracing Configuration + description: Controls diagnostic tracing during the session. + WorkflowParam: + properties: + id: + type: string + description: Identifier for the workflow invoked by the session. + version: + type: string + description: Specific workflow version to run. Defaults to the latest deployed version. + state_variables: + additionalProperties: + anyOf: + - type: string + maxLength: 10485760 + - type: integer + - type: boolean + - type: number + type: object + maxProperties: 64 + description: >- + State variables forwarded to the workflow. Keys may be up to 64 characters, values must be + primitive types, and the map defaults to an empty object. + x-oaiTypeLabel: map + tracing: + $ref: '#/components/schemas/WorkflowTracingParam' + description: >- + Optional tracing overrides for the workflow invocation. When omitted, tracing is enabled by + default. + type: object + required: + - id + title: Workflow settings + description: Workflow reference and overrides applied to the chat session. + ExpiresAfterParam: + properties: + anchor: + type: string + enum: + - created_at + description: Base timestamp used to calculate expiration. Currently fixed to `created_at`. + default: created_at + x-stainless-const: true + seconds: + type: integer + maximum: 600 + minimum: 1 + description: Number of seconds after the anchor when the session expires. + type: object + required: + - anchor + - seconds + title: Expiration overrides + description: Controls when the session expires relative to an anchor timestamp. + RateLimitsParam: + properties: + max_requests_per_1_minute: + type: integer + minimum: 1 + description: Maximum number of requests allowed per minute for the session. Defaults to 10. + type: object + required: [] + title: Rate limit overrides + description: Controls request rate limits for the session. + AutomaticThreadTitlingParam: + properties: + enabled: + type: boolean + description: Enable automatic thread title generation. Defaults to true. + type: object + required: [] + title: Automatic thread titling configuration + description: Controls whether ChatKit automatically generates thread titles. + FileUploadParam: + properties: + enabled: + type: boolean + description: Enable uploads for this session. Defaults to false. + max_file_size: + type: integer + maximum: 512 + minimum: 1 + description: >- + Maximum size in megabytes for each uploaded file. Defaults to 512 MB, which is the maximum + allowable size. + max_files: + type: integer + minimum: 1 + description: Maximum number of files that can be uploaded to the session. Defaults to 10. + type: object + required: [] + title: File upload configuration + description: Controls whether users can upload files. + HistoryParam: + properties: + enabled: + type: boolean + description: Enables chat users to access previous ChatKit threads. Defaults to true. + recent_threads: + type: integer + minimum: 1 + description: Number of recent ChatKit threads users have access to. Defaults to unlimited when unset. + type: object + required: [] + title: Chat history configuration + description: Controls how much historical context is retained for the session. + ChatkitConfigurationParam: + properties: + automatic_thread_titling: + $ref: '#/components/schemas/AutomaticThreadTitlingParam' + description: >- + Configuration for automatic thread titling. When omitted, automatic thread titling is enabled by + default. + file_upload: + $ref: '#/components/schemas/FileUploadParam' + description: >- + Configuration for upload enablement and limits. When omitted, uploads are disabled by default + (max_files 10, max_file_size 512 MB). + history: + $ref: '#/components/schemas/HistoryParam' + description: >- + Configuration for chat history retention. When omitted, history is enabled by default with no + limit on recent_threads (null). + type: object + required: [] + title: ChatKit configuration overrides + description: Optional per-session configuration settings for ChatKit behavior. + CreateChatSessionBody: + properties: + workflow: + $ref: '#/components/schemas/WorkflowParam' + description: Workflow that powers the session. + user: + type: string + minLength: 1 + description: >- + A free-form string that identifies your end user; ensures this Session can access other objects + that have the same `user` scope. + expires_after: + $ref: '#/components/schemas/ExpiresAfterParam' + description: Optional override for session expiration timing in seconds from creation. Defaults to 10 minutes. + rate_limits: + $ref: '#/components/schemas/RateLimitsParam' + description: Optional override for per-minute request limits. When omitted, defaults to 10. + chatkit_configuration: + $ref: '#/components/schemas/ChatkitConfigurationParam' + description: Optional overrides for ChatKit runtime configuration features + type: object + required: + - workflow + - user + title: Create chat session request + description: Parameters for provisioning a new ChatKit session. + UserMessageInputText: + properties: + type: + type: string + enum: + - input_text + description: Type discriminator that is always `input_text`. + default: input_text + x-stainless-const: true + text: + type: string + description: Plain-text content supplied by the user. + type: object + required: + - type + - text + title: User message input + description: Text block that a user contributed to the thread. + UserMessageQuotedText: + properties: + type: + type: string + enum: + - quoted_text + description: Type discriminator that is always `quoted_text`. + default: quoted_text + x-stainless-const: true + text: + type: string + description: Quoted text content. + type: object + required: + - type + - text + title: User message quoted text + description: Quoted snippet that the user referenced in their message. + AttachmentType: + type: string + enum: + - image + - file + Attachment: + properties: + type: + $ref: '#/components/schemas/AttachmentType' + description: Attachment discriminator. + id: + type: string + description: Identifier for the attachment. + name: + type: string + description: Original display name for the attachment. + mime_type: + type: string + description: MIME type of the attachment. + preview_url: + anyOf: + - type: string + description: Preview URL for rendering the attachment inline. + - type: 'null' + type: object + required: + - type + - id + - name + - mime_type + - preview_url + title: Attachment + description: Attachment metadata included on thread items. + ToolChoice: + properties: + id: + type: string + description: Identifier of the requested tool. + type: object + required: + - id + title: Tool choice + description: Tool selection that the assistant should honor when executing the item. + InferenceOptions: + properties: + tool_choice: + anyOf: + - $ref: '#/components/schemas/ToolChoice' + description: Preferred tool to invoke. Defaults to null when ChatKit should auto-select. + - type: 'null' + model: + anyOf: + - type: string + description: Model name that generated the response. Defaults to null when using the session default. + - type: 'null' + type: object + required: + - tool_choice + - model + title: Inference options + description: Model and tool overrides applied when generating the assistant response. + UserMessageItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.user_message + default: chatkit.user_message + x-stainless-const: true + content: + items: + description: Content blocks that comprise a user message. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/UserMessageInputText' + - $ref: '#/components/schemas/UserMessageQuotedText' + type: array + description: Ordered content elements supplied by the user. + attachments: + items: + $ref: '#/components/schemas/Attachment' + type: array + description: Attachments associated with the user message. Defaults to an empty list. + inference_options: + anyOf: + - $ref: '#/components/schemas/InferenceOptions' + description: Inference overrides applied to the message. Defaults to null when unset. + - type: 'null' + type: object + required: + - id + - object + - created_at + - thread_id + - type + - content + - attachments + - inference_options + title: User Message Item + description: User-authored messages within a thread. + FileAnnotationSource: + properties: + type: + type: string + enum: + - file + description: Type discriminator that is always `file`. + default: file + x-stainless-const: true + filename: + type: string + description: Filename referenced by the annotation. + type: object + required: + - type + - filename + title: File annotation source + description: Attachment source referenced by an annotation. + FileAnnotation: + properties: + type: + type: string + enum: + - file + description: Type discriminator that is always `file` for this annotation. + default: file + x-stainless-const: true + source: + $ref: '#/components/schemas/FileAnnotationSource' + description: File attachment referenced by the annotation. + type: object + required: + - type + - source + title: File annotation + description: Annotation that references an uploaded file. + UrlAnnotationSource: + properties: + type: + type: string + enum: + - url + description: Type discriminator that is always `url`. + default: url + x-stainless-const: true + url: + type: string + description: URL referenced by the annotation. + type: object + required: + - type + - url + title: URL annotation source + description: URL backing an annotation entry. + UrlAnnotation: + properties: + type: + type: string + enum: + - url + description: Type discriminator that is always `url` for this annotation. + default: url + x-stainless-const: true + source: + $ref: '#/components/schemas/UrlAnnotationSource' + description: URL referenced by the annotation. + type: object + required: + - type + - source + title: URL annotation + description: Annotation that references a URL. + ResponseOutputText: + properties: + type: + type: string + enum: + - output_text + description: Type discriminator that is always `output_text`. + default: output_text + x-stainless-const: true + text: + type: string + description: Assistant generated text. + annotations: + items: + description: Annotation object describing a cited source. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/FileAnnotation' + - $ref: '#/components/schemas/UrlAnnotation' + type: array + description: Ordered list of annotations attached to the response text. + type: object + required: + - type + - text + - annotations + title: Assistant message content + description: Assistant response text accompanied by optional annotations. + AssistantMessageItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.assistant_message + description: Type discriminator that is always `chatkit.assistant_message`. + default: chatkit.assistant_message + x-stainless-const: true + content: + items: + $ref: '#/components/schemas/ResponseOutputText' + type: array + description: Ordered assistant response segments. + type: object + required: + - id + - object + - created_at + - thread_id + - type + - content + title: Assistant message + description: Assistant-authored message within a thread. + WidgetMessageItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.widget + description: Type discriminator that is always `chatkit.widget`. + default: chatkit.widget + x-stainless-const: true + widget: + type: string + description: Serialized widget payload rendered in the UI. + type: object + required: + - id + - object + - created_at + - thread_id + - type + - widget + title: Widget message + description: Thread item that renders a widget payload. + ClientToolCallStatus: + type: string + enum: + - in_progress + - completed + ClientToolCallItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.client_tool_call + description: Type discriminator that is always `chatkit.client_tool_call`. + default: chatkit.client_tool_call + x-stainless-const: true + status: + $ref: '#/components/schemas/ClientToolCallStatus' + description: Execution status for the tool call. + call_id: + type: string + description: Identifier for the client tool call. + name: + type: string + description: Tool name that was invoked. + arguments: + type: string + description: JSON-encoded arguments that were sent to the tool. + output: + anyOf: + - type: string + description: JSON-encoded output captured from the tool. Defaults to null while execution is in progress. + - type: 'null' + type: object + required: + - id + - object + - created_at + - thread_id + - type + - status + - call_id + - name + - arguments + - output + title: Client tool call + description: Record of a client side tool invocation initiated by the assistant. + TaskType: + type: string + enum: + - custom + - thought + TaskItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.task + description: Type discriminator that is always `chatkit.task`. + default: chatkit.task + x-stainless-const: true + task_type: + $ref: '#/components/schemas/TaskType' + description: Subtype for the task. + heading: + anyOf: + - type: string + description: Optional heading for the task. Defaults to null when not provided. + - type: 'null' + summary: + anyOf: + - type: string + description: Optional summary that describes the task. Defaults to null when omitted. + - type: 'null' + type: object + required: + - id + - object + - created_at + - thread_id + - type + - task_type + - heading + - summary + title: Task item + description: Task emitted by the workflow to show progress and status updates. + TaskGroupTask: + properties: + type: + $ref: '#/components/schemas/TaskType' + description: Subtype for the grouped task. + heading: + anyOf: + - type: string + description: Optional heading for the grouped task. Defaults to null when not provided. + - type: 'null' + summary: + anyOf: + - type: string + description: Optional summary that describes the grouped task. Defaults to null when omitted. + - type: 'null' + type: object + required: + - type + - heading + - summary + title: Task group task + description: Task entry that appears within a TaskGroup. + TaskGroupItem: + properties: + id: + type: string + description: Identifier of the thread item. + object: + type: string + enum: + - chatkit.thread_item + description: Type discriminator that is always `chatkit.thread_item`. + default: chatkit.thread_item + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the item was created. + thread_id: + type: string + description: Identifier of the parent thread. + type: + type: string + enum: + - chatkit.task_group + description: Type discriminator that is always `chatkit.task_group`. + default: chatkit.task_group + x-stainless-const: true + tasks: + items: + $ref: '#/components/schemas/TaskGroupTask' + type: array + description: Tasks included in the group. + type: object + required: + - id + - object + - created_at + - thread_id + - type + - tasks + title: Task group + description: Collection of workflow tasks grouped together in the thread. + ThreadItem: + title: The thread item + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/UserMessageItem' + - $ref: '#/components/schemas/AssistantMessageItem' + - $ref: '#/components/schemas/WidgetMessageItem' + - $ref: '#/components/schemas/ClientToolCallItem' + - $ref: '#/components/schemas/TaskItem' + - $ref: '#/components/schemas/TaskGroupItem' + ThreadItemListResource: + properties: + object: + description: The type of object returned, must be `list`. + default: list + x-stainless-const: true + const: list + data: + items: + $ref: '#/components/schemas/ThreadItem' + type: array + description: A list of items + first_id: + anyOf: + - type: string + description: The ID of the first item in the list. + - type: 'null' + last_id: + anyOf: + - type: string + description: The ID of the last item in the list. + - type: 'null' + has_more: + type: boolean + description: Whether there are more items available. + type: object + required: + - object + - data + - first_id + - last_id + - has_more + title: Thread Items + description: A paginated list of thread items rendered for the ChatKit API. + ActiveStatus: + properties: + type: + type: string + enum: + - active + description: Status discriminator that is always `active`. + default: active + x-stainless-const: true + type: object + required: + - type + title: Active thread status + description: Indicates that a thread is active. + LockedStatus: + properties: + type: + type: string + enum: + - locked + description: Status discriminator that is always `locked`. + default: locked + x-stainless-const: true + reason: + anyOf: + - type: string + description: Reason that the thread was locked. Defaults to null when no reason is recorded. + - type: 'null' + type: object + required: + - type + - reason + title: Locked thread status + description: Indicates that a thread is locked and cannot accept new input. + ClosedStatus: + properties: + type: + type: string + enum: + - closed + description: Status discriminator that is always `closed`. + default: closed + x-stainless-const: true + reason: + anyOf: + - type: string + description: Reason that the thread was closed. Defaults to null when no reason is recorded. + - type: 'null' + type: object + required: + - type + - reason + title: Closed thread status + description: Indicates that a thread has been closed. + ThreadResource: + properties: + id: + type: string + description: Identifier of the thread. + object: + type: string + enum: + - chatkit.thread + description: Type discriminator that is always `chatkit.thread`. + default: chatkit.thread + x-stainless-const: true + created_at: + type: integer + description: Unix timestamp (in seconds) for when the thread was created. + title: + anyOf: + - type: string + description: >- + Optional human-readable title for the thread. Defaults to null when no title has been + generated. + - type: 'null' + status: + description: Current status for the thread. Defaults to `active` for newly created threads. + discriminator: + propertyName: type + anyOf: + - $ref: '#/components/schemas/ActiveStatus' + - $ref: '#/components/schemas/LockedStatus' + - $ref: '#/components/schemas/ClosedStatus' + user: + type: string + description: Free-form string that identifies your end user who owns the thread. + type: object + required: + - id + - object + - created_at + - title + - status + - user + title: The thread object + description: Represents a ChatKit thread and its current status. + example: + id: cthr_def456 + object: chatkit.thread + created_at: 1712345600 + title: Demo feedback + status: + type: active + user: user_456 + DeletedThreadResource: + properties: + id: + type: string + description: Identifier of the deleted thread. + object: + type: string + enum: + - chatkit.thread.deleted + description: Type discriminator that is always `chatkit.thread.deleted`. + default: chatkit.thread.deleted + x-stainless-const: true + deleted: + type: boolean + description: Indicates that the thread has been deleted. + type: object + required: + - id + - object + - deleted + title: Deleted thread + description: Confirmation payload returned after deleting a thread. + ThreadListResource: + properties: + object: + description: The type of object returned, must be `list`. + default: list + x-stainless-const: true + const: list + data: + items: + $ref: '#/components/schemas/ThreadResource' + type: array + description: A list of items + first_id: + anyOf: + - type: string + description: The ID of the first item in the list. + - type: 'null' + last_id: + anyOf: + - type: string + description: The ID of the last item in the list. + - type: 'null' + has_more: + type: boolean + description: Whether there are more items available. + type: object + required: + - object + - data + - first_id + - last_id + - has_more + title: Threads + description: A paginated list of ChatKit threads. + RealtimeConnectParams: + type: object + properties: + model: + type: string + call_id: + type: string + ModerationImageURLInput: + type: object + description: An object describing an image to classify. + properties: + type: + description: Always `image_url`. + type: string + enum: + - image_url + x-stainless-const: true + image_url: + type: object + description: Contains either an image URL or a data URL for a base64 encoded image. + properties: + url: + type: string + description: Either a URL of the image or the base64 encoded image data. + format: uri + example: https://example.com/image.jpg + required: + - url + required: + - type + - image_url + ModerationTextInput: + type: object + description: An object describing text to classify. + properties: + type: + description: Always `text`. + type: string + enum: + - text + x-stainless-const: true + text: + description: A string of text to classify. + type: string + example: I want to kill them + required: + - type + - text + ComparisonFilterValueItems: + anyOf: + - type: string + - type: number + ChunkingStrategyResponse: + type: object + description: The strategy used to chunk the file. + anyOf: + - $ref: '#/components/schemas/StaticChunkingStrategyResponseParam' + - $ref: '#/components/schemas/OtherChunkingStrategyResponseParam' + discriminator: + propertyName: type + FilePurpose: + description: > + The intended purpose of the uploaded file. One of: - `assistants`: Used in the Assistants API - + `batch`: Used in the Batch API - `fine-tune`: Used for fine-tuning - `vision`: Images used for vision + fine-tuning - `user_data`: Flexible file type for any purpose - `evals`: Used for eval data sets + type: string + enum: + - assistants + - batch + - fine-tune + - vision + - user_data + - evals + BatchError: + type: object + properties: + code: + type: string + description: An error code identifying the error type. + message: + type: string + description: A human-readable message providing more details about the error. + param: + anyOf: + - type: string + description: The name of the parameter that caused the error, if applicable. + - type: 'null' + line: + anyOf: + - type: integer + description: The line number of the input file where the error occurred, if applicable. + - type: 'null' + BatchRequestCounts: + type: object + properties: + total: + type: integer + description: Total number of requests in the batch. + completed: + type: integer + description: Number of requests that have been completed successfully. + failed: + type: integer + description: Number of requests that have failed. + required: + - total + - completed + - failed + description: The request counts for different statuses within the batch. + AssistantTool: + anyOf: + - $ref: '#/components/schemas/AssistantToolsCode' + - $ref: '#/components/schemas/AssistantToolsFileSearch' + - $ref: '#/components/schemas/AssistantToolsFunction' + discriminator: + propertyName: type + TextAnnotationDelta: + anyOf: + - $ref: '#/components/schemas/MessageDeltaContentTextAnnotationsFileCitationObject' + - $ref: '#/components/schemas/MessageDeltaContentTextAnnotationsFilePathObject' + discriminator: + propertyName: type + TextAnnotation: + anyOf: + - $ref: '#/components/schemas/MessageContentTextAnnotationsFileCitationObject' + - $ref: '#/components/schemas/MessageContentTextAnnotationsFilePathObject' + discriminator: + propertyName: type + RunStepDetailsToolCall: + anyOf: + - $ref: '#/components/schemas/RunStepDetailsToolCallsCodeObject' + - $ref: '#/components/schemas/RunStepDetailsToolCallsFileSearchObject' + - $ref: '#/components/schemas/RunStepDetailsToolCallsFunctionObject' + discriminator: + propertyName: type + RunStepDeltaStepDetailsToolCall: + anyOf: + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsCodeObject' + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsFileSearchObject' + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsFunctionObject' + discriminator: + propertyName: type + MessageContent: + anyOf: + - $ref: '#/components/schemas/MessageContentImageFileObject' + - $ref: '#/components/schemas/MessageContentImageUrlObject' + - $ref: '#/components/schemas/MessageContentTextObject' + - $ref: '#/components/schemas/MessageContentRefusalObject' + discriminator: + propertyName: type + MessageContentDelta: + anyOf: + - $ref: '#/components/schemas/MessageDeltaContentImageFileObject' + - $ref: '#/components/schemas/MessageDeltaContentTextObject' + - $ref: '#/components/schemas/MessageDeltaContentRefusalObject' + - $ref: '#/components/schemas/MessageDeltaContentImageUrlObject' + discriminator: + propertyName: type + ChatModel: + type: string + enum: + - gpt-5.1 + - gpt-5.1-2025-11-13 + - gpt-5.1-codex + - gpt-5.1-mini + - gpt-5.1-chat-latest + - gpt-5 + - gpt-5-mini + - gpt-5-nano + - gpt-5-2025-08-07 + - gpt-5-mini-2025-08-07 + - gpt-5-nano-2025-08-07 + - gpt-5-chat-latest + - gpt-4.1 + - gpt-4.1-mini + - gpt-4.1-nano + - gpt-4.1-2025-04-14 + - gpt-4.1-mini-2025-04-14 + - gpt-4.1-nano-2025-04-14 + - o4-mini + - o4-mini-2025-04-16 + - o3 + - o3-2025-04-16 + - o3-mini + - o3-mini-2025-01-31 + - o1 + - o1-2024-12-17 + - o1-preview + - o1-preview-2024-09-12 + - o1-mini + - o1-mini-2024-09-12 + - gpt-4o + - gpt-4o-2024-11-20 + - gpt-4o-2024-08-06 + - gpt-4o-2024-05-13 + - gpt-4o-audio-preview + - gpt-4o-audio-preview-2024-10-01 + - gpt-4o-audio-preview-2024-12-17 + - gpt-4o-audio-preview-2025-06-03 + - gpt-4o-mini-audio-preview + - gpt-4o-mini-audio-preview-2024-12-17 + - gpt-4o-search-preview + - gpt-4o-mini-search-preview + - gpt-4o-search-preview-2025-03-11 + - gpt-4o-mini-search-preview-2025-03-11 + - chatgpt-4o-latest + - codex-mini-latest + - gpt-4o-mini + - gpt-4o-mini-2024-07-18 + - gpt-4-turbo + - gpt-4-turbo-2024-04-09 + - gpt-4-0125-preview + - gpt-4-turbo-preview + - gpt-4-1106-preview + - gpt-4-vision-preview + - gpt-4 + - gpt-4-0314 + - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0314 + - gpt-4-32k-0613 + - gpt-3.5-turbo + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-0301 + - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-1106 + - gpt-3.5-turbo-0125 + - gpt-3.5-turbo-16k-0613 + x-stainless-nominal: false + Summary: + properties: + type: + type: string + enum: + - summary_text + description: The type of the object. Always `summary_text`. + default: summary_text + x-stainless-const: true + text: + type: string + description: A summary of the reasoning output from the model so far. + type: object + required: + - type + - text + title: Summary text + description: A summary text from the model. + CreateThreadAndRunRequestWithoutStream: + type: object + additionalProperties: false + properties: + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + type: string + thread: + $ref: '#/components/schemas/CreateThreadRequest' + model: + description: >- + The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute + this run. If a value is provided here, it will override the model associated with the assistant. + If not, the model associated with the assistant will be used. + anyOf: + - type: string + - type: string + enum: + - gpt-5 + - gpt-5-mini + - gpt-5-nano + - gpt-5-2025-08-07 + - gpt-5-mini-2025-08-07 + - gpt-5-nano-2025-08-07 + - gpt-4.1 + - gpt-4.1-mini + - gpt-4.1-nano + - gpt-4.1-2025-04-14 + - gpt-4.1-mini-2025-04-14 + - gpt-4.1-nano-2025-04-14 + - gpt-4o + - gpt-4o-2024-11-20 + - gpt-4o-2024-08-06 + - gpt-4o-2024-05-13 + - gpt-4o-mini + - gpt-4o-mini-2024-07-18 + - gpt-4.5-preview + - gpt-4.5-preview-2025-02-27 + - gpt-4-turbo + - gpt-4-turbo-2024-04-09 + - gpt-4-0125-preview + - gpt-4-turbo-preview + - gpt-4-1106-preview + - gpt-4-vision-preview + - gpt-4 + - gpt-4-0314 + - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0314 + - gpt-4-32k-0613 + - gpt-3.5-turbo + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-1106 + - gpt-3.5-turbo-0125 + - gpt-3.5-turbo-16k-0613 + x-oaiTypeLabel: string + nullable: true + instructions: + description: >- + Override the default system message of the assistant. This is useful for modifying the behavior on + a per-run basis. + type: string + nullable: true + tools: + description: >- + Override the tools the assistant can use for this run. This is useful for modifying the behavior + on a per-run basis. + nullable: true + type: array + maxItems: 20 + items: + $ref: '#/components/schemas/AssistantTool' + tool_resources: + type: object + description: > + A set of resources that are used by the assistant's tools. The resources are specific to the type + of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the + `file_search` tool requires a list of vector store IDs. + properties: + code_interpreter: + type: object + properties: + file_ids: + type: array + description: > + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made available + to the `code_interpreter` tool. There can be a maximum of 20 files associated with the + tool. + default: [] + maxItems: 20 + items: + type: string + file_search: + type: object + properties: + vector_store_ids: + type: array + description: > + The ID of the [vector + store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to + this assistant. There can be a maximum of 1 vector store attached to the assistant. + maxItems: 1 + items: + type: string + nullable: true + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the + top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + max_prompt_tokens: + type: integer + nullable: true + description: > + The maximum number of prompt tokens that may be used over the course of the run. The run will make + a best effort to use only the number of prompt tokens specified, across multiple turns of the run. + If the run exceeds the number of prompt tokens specified, the run will end with status + `incomplete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: > + The maximum number of completion tokens that may be used over the course of the run. The run will + make a best effort to use only the number of completion tokens specified, across multiple turns of + the run. If the run exceeds the number of completion tokens specified, the run will end with + status `incomplete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + allOf: + - $ref: '#/components/schemas/TruncationObject' + - nullable: true + tool_choice: + allOf: + - $ref: '#/components/schemas/AssistantsApiToolChoiceOption' + - nullable: true + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + response_format: + $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + nullable: true + required: *ref_0 + CreateRunRequestWithoutStream: + type: object + additionalProperties: false + properties: + assistant_id: + description: >- + The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to + execute this run. + type: string + model: + description: >- + The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute + this run. If a value is provided here, it will override the model associated with the assistant. + If not, the model associated with the assistant will be used. + anyOf: + - type: string + - $ref: '#/components/schemas/AssistantSupportedModels' + x-oaiTypeLabel: string + nullable: true + reasoning_effort: + $ref: '#/components/schemas/ReasoningEffort' + instructions: + description: >- + Overrides the + [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) of the + assistant. This is useful for modifying the behavior on a per-run basis. + type: string + nullable: true + additional_instructions: + description: >- + Appends additional instructions at the end of the instructions for the run. This is useful for + modifying the behavior on a per-run basis without overriding other instructions. + type: string + nullable: true + additional_messages: + description: Adds additional messages to the thread before creating the run. + type: array + items: + $ref: '#/components/schemas/CreateMessageRequest' + nullable: true + tools: + description: >- + Override the tools the assistant can use for this run. This is useful for modifying the behavior + on a per-run basis. + nullable: true + type: array + maxItems: 20 + items: + $ref: '#/components/schemas/AssistantTool' + metadata: + $ref: '#/components/schemas/Metadata' + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: > + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output + more random, while lower values like 0.2 will make it more focused and deterministic. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: > + An alternative to sampling with temperature, called nucleus sampling, where the model considers + the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the + top 10% probability mass are considered. + + + We generally recommend altering this or temperature but not both. + max_prompt_tokens: + type: integer + nullable: true + description: > + The maximum number of prompt tokens that may be used over the course of the run. The run will make + a best effort to use only the number of prompt tokens specified, across multiple turns of the run. + If the run exceeds the number of prompt tokens specified, the run will end with status + `incomplete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: > + The maximum number of completion tokens that may be used over the course of the run. The run will + make a best effort to use only the number of completion tokens specified, across multiple turns of + the run. If the run exceeds the number of completion tokens specified, the run will end with + status `incomplete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + allOf: + - $ref: '#/components/schemas/TruncationObject' + - nullable: true + tool_choice: + allOf: + - $ref: '#/components/schemas/AssistantsApiToolChoiceOption' + - nullable: true + parallel_tool_calls: + $ref: '#/components/schemas/ParallelToolCalls' + response_format: + $ref: '#/components/schemas/AssistantsApiResponseFormatOption' + nullable: true + required: *ref_0 + SubmitToolOutputsRunRequestWithoutStream: + type: object + additionalProperties: false + properties: + tool_outputs: + description: A list of tools for which the outputs are being submitted. + type: array + items: + type: object + properties: + tool_call_id: + type: string + description: >- + The ID of the tool call in the `required_action` object within the run object the output is + being submitted for. + output: + type: string + description: The output of the tool call to be submitted to continue the run. + required: + - tool_outputs + RunStatus: + description: >- + The status of the run, which can be either `queued`, `in_progress`, `requires_action`, `cancelling`, + `cancelled`, `failed`, `completed`, `incomplete`, or `expired`. + type: string + enum: + - queued + - in_progress + - requires_action + - cancelling + - cancelled + - failed + - completed + - incomplete + - expired + RunStepDeltaObjectDelta: + description: The delta containing the fields that have changed on the run step. + type: object + properties: + step_details: + type: object + description: The details of the run step. + anyOf: + - $ref: '#/components/schemas/RunStepDeltaStepDetailsMessageCreationObject' + - $ref: '#/components/schemas/RunStepDeltaStepDetailsToolCallsObject' + discriminator: + propertyName: type + CodeInterpreterContainerAuto: + properties: + type: + type: string + enum: + - auto + description: Always `auto`. + default: auto + x-stainless-const: true + file_ids: + items: + type: string + example: file-123 + type: array + maxItems: 50 + description: An optional list of uploaded files to make available to your code. + memory_limit: + anyOf: + - $ref: '#/components/schemas/ContainerMemoryLimit' + - type: 'null' + type: object + required: + - type + title: CodeInterpreterToolAuto + description: >- + Configuration for a code interpreter container. Optionally specify the IDs of the files to run the + code on. + x-stainless-naming: + go: + type_name: ToolCodeInterpreterContainerCodeInterpreterContainerAuto + securitySchemes: + ApiKeyAuth: + type: http + scheme: bearer +x-oaiMeta: + navigationGroups: + - id: responses + title: Responses API + - id: webhooks + title: Webhooks + - id: endpoints + title: Platform APIs + - id: vector_stores + title: Vector stores + - id: chatkit + title: ChatKit + beta: true + - id: containers + title: Containers + - id: realtime + title: Realtime + - id: chat + title: Chat Completions + - id: assistants + title: Assistants + beta: true + - id: administration + title: Administration + - id: legacy + title: Legacy + groups: + - id: responses + title: Responses + description: | + OpenAI's most advanced interface for generating model responses. Supports + text and image inputs, and text outputs. Create stateful interactions + with the model, using the output of previous responses as input. Extend + the model's capabilities with built-in tools for file search, web search, + computer use, and more. Allow the model access to external systems and data + using function calling. + + Related guides: + - [Quickstart](https://platform.openai.com/docs/quickstart?api-mode=responses) + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text?api-mode=responses) + - [Image inputs](https://platform.openai.com/docs/guides/images?api-mode=responses) + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs?api-mode=responses) + - [Function calling](https://platform.openai.com/docs/guides/function-calling?api-mode=responses) + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state?api-mode=responses) + - [Extend the models with tools](https://platform.openai.com/docs/guides/tools?api-mode=responses) + navigationGroup: responses + sections: + - type: endpoint + key: createResponse + path: create + - type: endpoint + key: getResponse + path: get + - type: endpoint + key: deleteResponse + path: delete + - type: endpoint + key: cancelResponse + path: cancel + - type: endpoint + key: listInputItems + path: input-items + - type: endpoint + key: Getinputtokencounts + path: input-tokens + - type: object + key: Response + path: object + - type: object + key: ResponseItemList + path: list + - id: conversations + title: Conversations + description: | + Create and manage conversations to store and retrieve conversation state across Response API calls. + navigationGroup: responses + sections: + - type: endpoint + key: createConversation + path: create + - type: endpoint + key: getConversation + path: retrieve + - type: endpoint + key: updateConversation + path: update + - type: endpoint + key: deleteConversation + path: delete + - type: endpoint + key: listConversationItems + path: list-items + - type: endpoint + key: createConversationItems + path: create-items + - type: endpoint + key: getConversationItem + path: get-item + - type: endpoint + key: deleteConversationItem + path: delete-item + - type: object + key: Conversation + path: object + - type: object + key: ConversationItemList + path: list-items-object + - id: responses-streaming + title: Streaming events + description: > + When you [create a Response](https://platform.openai.com/docs/api-reference/responses/create) with + + `stream` set to `true`, the server will emit server-sent events to the + + client as the Response is generated. This section contains the events that + + are emitted by the server. + + + [Learn more about streaming + responses](https://platform.openai.com/docs/guides/streaming-responses?api-mode=responses). + navigationGroup: responses + sections: + - type: object + key: ResponseCreatedEvent + path: + - type: object + key: ResponseInProgressEvent + path: + - type: object + key: ResponseCompletedEvent + path: + - type: object + key: ResponseFailedEvent + path: + - type: object + key: ResponseIncompleteEvent + path: + - type: object + key: ResponseOutputItemAddedEvent + path: + - type: object + key: ResponseOutputItemDoneEvent + path: + - type: object + key: ResponseContentPartAddedEvent + path: + - type: object + key: ResponseContentPartDoneEvent + path: + - type: object + key: ResponseTextDeltaEvent + path: response/output_text/delta + - type: object + key: ResponseTextDoneEvent + path: response/output_text/done + - type: object + key: ResponseRefusalDeltaEvent + path: + - type: object + key: ResponseRefusalDoneEvent + path: + - type: object + key: ResponseFunctionCallArgumentsDeltaEvent + path: + - type: object + key: ResponseFunctionCallArgumentsDoneEvent + path: + - type: object + key: ResponseFileSearchCallInProgressEvent + path: + - type: object + key: ResponseFileSearchCallSearchingEvent + path: + - type: object + key: ResponseFileSearchCallCompletedEvent + path: + - type: object + key: ResponseWebSearchCallInProgressEvent + path: + - type: object + key: ResponseWebSearchCallSearchingEvent + path: + - type: object + key: ResponseWebSearchCallCompletedEvent + path: + - type: object + key: ResponseReasoningSummaryPartAddedEvent + path: + - type: object + key: ResponseReasoningSummaryPartDoneEvent + path: + - type: object + key: ResponseReasoningSummaryTextDeltaEvent + path: + - type: object + key: ResponseReasoningSummaryTextDoneEvent + path: + - type: object + key: ResponseReasoningTextDeltaEvent + path: + - type: object + key: ResponseReasoningTextDoneEvent + path: + - type: object + key: ResponseImageGenCallCompletedEvent + path: + - type: object + key: ResponseImageGenCallGeneratingEvent + path: + - type: object + key: ResponseImageGenCallInProgressEvent + path: + - type: object + key: ResponseImageGenCallPartialImageEvent + path: + - type: object + key: ResponseMCPCallArgumentsDeltaEvent + path: + - type: object + key: ResponseMCPCallArgumentsDoneEvent + path: + - type: object + key: ResponseMCPCallCompletedEvent + path: + - type: object + key: ResponseMCPCallFailedEvent + path: + - type: object + key: ResponseMCPCallInProgressEvent + path: + - type: object + key: ResponseMCPListToolsCompletedEvent + path: + - type: object + key: ResponseMCPListToolsFailedEvent + path: + - type: object + key: ResponseMCPListToolsInProgressEvent + path: + - type: object + key: ResponseCodeInterpreterCallInProgressEvent + path: + - type: object + key: ResponseCodeInterpreterCallInterpretingEvent + path: + - type: object + key: ResponseCodeInterpreterCallCompletedEvent + path: + - type: object + key: ResponseCodeInterpreterCallCodeDeltaEvent + path: + - type: object + key: ResponseCodeInterpreterCallCodeDoneEvent + path: + - type: object + key: ResponseOutputTextAnnotationAddedEvent + path: + - type: object + key: ResponseQueuedEvent + path: + - type: object + key: ResponseCustomToolCallInputDeltaEvent + path: + - type: object + key: ResponseCustomToolCallInputDoneEvent + path: + - type: object + key: ResponseErrorEvent + path: + - id: webhook-events + title: Webhook Events + description: | + Webhooks are HTTP requests sent by OpenAI to a URL you specify when certain + events happen during the course of API usage. + + [Learn more about webhooks](https://platform.openai.com/docs/guides/webhooks). + navigationGroup: webhooks + sections: + - type: object + key: WebhookResponseCompleted + path: + - type: object + key: WebhookResponseCancelled + path: + - type: object + key: WebhookResponseFailed + path: + - type: object + key: WebhookResponseIncomplete + path: + - type: object + key: WebhookBatchCompleted + path: + - type: object + key: WebhookBatchCancelled + path: + - type: object + key: WebhookBatchExpired + path: + - type: object + key: WebhookBatchFailed + path: + - type: object + key: WebhookFineTuningJobSucceeded + path: + - type: object + key: WebhookFineTuningJobFailed + path: + - type: object + key: WebhookFineTuningJobCancelled + path: + - type: object + key: WebhookEvalRunSucceeded + path: + - type: object + key: WebhookEvalRunFailed + path: + - type: object + key: WebhookEvalRunCanceled + path: + - type: object + key: WebhookRealtimeCallIncoming + path: + - id: audio + title: Audio + description: | + Learn how to turn audio into text or text into audio. + + Related guide: [Speech to text](https://platform.openai.com/docs/guides/speech-to-text) + navigationGroup: endpoints + sections: + - type: endpoint + key: createSpeech + path: createSpeech + - type: endpoint + key: createTranscription + path: createTranscription + - type: endpoint + key: createTranslation + path: createTranslation + - type: object + key: CreateTranscriptionResponseJson + path: json-object + - type: object + key: CreateTranscriptionResponseDiarizedJson + path: diarized-json-object + - type: object + key: CreateTranscriptionResponseVerboseJson + path: verbose-json-object + - type: object + key: SpeechAudioDeltaEvent + path: speech-audio-delta-event + - type: object + key: SpeechAudioDoneEvent + path: speech-audio-done-event + - type: object + key: TranscriptTextDeltaEvent + path: transcript-text-delta-event + - type: object + key: TranscriptTextSegmentEvent + path: transcript-text-segment-event + - type: object + key: TranscriptTextDoneEvent + path: transcript-text-done-event + - id: videos + title: Videos + description: | + Generate videos. + navigationGroup: endpoints + sections: + - type: endpoint + key: createVideo + path: create + - type: endpoint + key: CreateVideoRemix + path: remix + - type: endpoint + key: ListVideos + path: list + - type: endpoint + key: GetVideo + path: retrieve + - type: endpoint + key: DeleteVideo + path: delete + - type: endpoint + key: RetrieveVideoContent + path: content + - type: object + key: VideoResource + path: object + - id: images + title: Images + description: | + Given a prompt and/or an input image, the model will generate a new image. + Related guide: [Image generation](https://platform.openai.com/docs/guides/images) + navigationGroup: endpoints + sections: + - type: endpoint + key: createImage + path: create + - type: endpoint + key: createImageEdit + path: createEdit + - type: endpoint + key: createImageVariation + path: createVariation + - type: object + key: ImagesResponse + path: object + - id: images-streaming + title: Image Streaming + description: | + Stream image generation and editing in real time with server-sent events. + [Learn more about image streaming](https://platform.openai.com/docs/guides/image-generation). + navigationGroup: endpoints + sections: + - type: object + key: ImageGenPartialImageEvent + path: + - type: object + key: ImageGenCompletedEvent + path: + - type: object + key: ImageEditPartialImageEvent + path: + - type: object + key: ImageEditCompletedEvent + path: + - id: embeddings + title: Embeddings + description: > + Get a vector representation of a given input that can be easily consumed by machine learning models + and algorithms. + + Related guide: [Embeddings](https://platform.openai.com/docs/guides/embeddings) + navigationGroup: endpoints + sections: + - type: endpoint + key: createEmbedding + path: create + - type: object + key: Embedding + path: object + - id: chatkit + title: ChatKit + beta: true + description: | + Manage ChatKit sessions, threads, and file uploads for internal integrations. + navigationGroup: chatkit + sections: + - type: endpoint + key: CreateChatSessionMethod + beta: true + path: sessions/create + - type: endpoint + key: CancelChatSessionMethod + beta: true + path: sessions/cancel + - type: endpoint + key: ListThreadsMethod + beta: true + path: threads/list + - type: endpoint + key: GetThreadMethod + beta: true + path: threads/retrieve + - type: endpoint + key: DeleteThreadMethod + beta: true + path: threads/delete + - type: endpoint + key: ListThreadItemsMethod + beta: true + path: threads/list-items + - type: object + key: ChatSessionResource + path: sessions/object + - type: object + key: ThreadResource + path: threads/object + - type: object + key: ThreadItemListResource + path: threads/item-list + - id: evals + title: Evals + description: | + Create, manage, and run evals in the OpenAI platform. + Related guide: [Evals](https://platform.openai.com/docs/guides/evals) + navigationGroup: endpoints + sections: + - type: endpoint + key: createEval + path: create + - type: endpoint + key: getEval + path: get + - type: endpoint + key: updateEval + path: update + - type: endpoint + key: deleteEval + path: delete + - type: endpoint + key: listEvals + path: list + - type: endpoint + key: getEvalRuns + path: getRuns + - type: endpoint + key: getEvalRun + path: getRun + - type: endpoint + key: createEvalRun + path: createRun + - type: endpoint + key: cancelEvalRun + path: cancelRun + - type: endpoint + key: deleteEvalRun + path: deleteRun + - type: endpoint + key: getEvalRunOutputItem + path: getRunOutputItem + - type: endpoint + key: getEvalRunOutputItems + path: getRunOutputItems + - type: object + key: Eval + path: object + - type: object + key: EvalRun + path: run-object + - type: object + key: EvalRunOutputItem + path: run-output-item-object + - id: fine-tuning + title: Fine-tuning + description: | + Manage fine-tuning jobs to tailor a model to your specific training data. + Related guide: [Fine-tune models](https://platform.openai.com/docs/guides/fine-tuning) + navigationGroup: endpoints + sections: + - type: endpoint + key: createFineTuningJob + path: create + - type: endpoint + key: listPaginatedFineTuningJobs + path: list + - type: endpoint + key: listFineTuningEvents + path: list-events + - type: endpoint + key: listFineTuningJobCheckpoints + path: list-checkpoints + - type: endpoint + key: listFineTuningCheckpointPermissions + path: list-permissions + - type: endpoint + key: createFineTuningCheckpointPermission + path: create-permission + - type: endpoint + key: deleteFineTuningCheckpointPermission + path: delete-permission + - type: endpoint + key: retrieveFineTuningJob + path: retrieve + - type: endpoint + key: cancelFineTuningJob + path: cancel + - type: endpoint + key: resumeFineTuningJob + path: resume + - type: endpoint + key: pauseFineTuningJob + path: pause + - type: object + key: FineTuneChatRequestInput + path: chat-input + - type: object + key: FineTunePreferenceRequestInput + path: preference-input + - type: object + key: FineTuneReinforcementRequestInput + path: reinforcement-input + - type: object + key: FineTuningJob + path: object + - type: object + key: FineTuningJobEvent + path: event-object + - type: object + key: FineTuningJobCheckpoint + path: checkpoint-object + - type: object + key: FineTuningCheckpointPermission + path: permission-object + - id: graders + title: Graders + description: | + Manage and run graders in the OpenAI platform. + Related guide: [Graders](https://platform.openai.com/docs/guides/graders) + navigationGroup: endpoints + sections: + - type: object + key: GraderStringCheck + path: string-check + - type: object + key: GraderTextSimilarity + path: text-similarity + - type: object + key: GraderScoreModel + path: score-model + - type: object + key: GraderLabelModel + path: label-model + - type: object + key: GraderPython + path: python + - type: object + key: GraderMulti + path: multi + - type: endpoint + key: runGrader + path: run + - type: endpoint + key: validateGrader + path: validate + beta: true + - id: batch + title: Batch + description: > + Create large batches of API requests for asynchronous processing. The Batch API returns completions + within 24 hours for a 50% discount. + + Related guide: [Batch](https://platform.openai.com/docs/guides/batch) + navigationGroup: endpoints + sections: + - type: endpoint + key: createBatch + path: create + - type: endpoint + key: retrieveBatch + path: retrieve + - type: endpoint + key: cancelBatch + path: cancel + - type: endpoint + key: listBatches + path: list + - type: object + key: Batch + path: object + - type: object + key: BatchRequestInput + path: request-input + - type: object + key: BatchRequestOutput + path: request-output + - id: files + title: Files + description: > + Files are used to upload documents that can be used with features like + [Assistants](https://platform.openai.com/docs/api-reference/assistants), + [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning), and [Batch + API](https://platform.openai.com/docs/guides/batch). + navigationGroup: endpoints + sections: + - type: endpoint + key: createFile + path: create + - type: endpoint + key: listFiles + path: list + - type: endpoint + key: retrieveFile + path: retrieve + - type: endpoint + key: deleteFile + path: delete + - type: endpoint + key: downloadFile + path: retrieve-contents + - type: object + key: OpenAIFile + path: object + - id: uploads + title: Uploads + description: | + Allows you to upload large files in multiple parts. + navigationGroup: endpoints + sections: + - type: endpoint + key: createUpload + path: create + - type: endpoint + key: addUploadPart + path: add-part + - type: endpoint + key: completeUpload + path: complete + - type: endpoint + key: cancelUpload + path: cancel + - type: object + key: Upload + path: object + - type: object + key: UploadPart + path: part-object + - id: models + title: Models + description: > + List and describe the various models available in the API. You can refer to the + [Models](https://platform.openai.com/docs/models) documentation to understand what models are + available and the differences between them. + navigationGroup: endpoints + sections: + - type: endpoint + key: listModels + path: list + - type: endpoint + key: retrieveModel + path: retrieve + - type: endpoint + key: deleteModel + path: delete + - type: object + key: Model + path: object + - id: moderations + title: Moderations + description: > + Given text and/or image inputs, classifies if those inputs are potentially harmful across several + categories. + + Related guide: [Moderations](https://platform.openai.com/docs/guides/moderation) + navigationGroup: endpoints + sections: + - type: endpoint + key: createModeration + path: create + - type: object + key: CreateModerationResponse + path: object + - id: vector-stores + title: Vector stores + description: > + Vector stores power semantic search for the Retrieval API and the `file_search` tool in the Responses + and Assistants APIs. + + + Related guide: [File Search](https://platform.openai.com/docs/assistants/tools/file-search) + navigationGroup: vector_stores + sections: + - type: endpoint + key: createVectorStore + path: create + - type: endpoint + key: listVectorStores + path: list + - type: endpoint + key: getVectorStore + path: retrieve + - type: endpoint + key: modifyVectorStore + path: modify + - type: endpoint + key: deleteVectorStore + path: delete + - type: endpoint + key: searchVectorStore + path: search + - type: object + key: VectorStoreObject + path: object + - id: vector-stores-files + title: Vector store files + description: | + Vector store files represent files inside a vector store. + + Related guide: [File Search](https://platform.openai.com/docs/assistants/tools/file-search) + navigationGroup: vector_stores + sections: + - type: endpoint + key: createVectorStoreFile + path: createFile + - type: endpoint + key: listVectorStoreFiles + path: listFiles + - type: endpoint + key: getVectorStoreFile + path: getFile + - type: endpoint + key: retrieveVectorStoreFileContent + path: getContent + - type: endpoint + key: updateVectorStoreFileAttributes + path: updateAttributes + - type: endpoint + key: deleteVectorStoreFile + path: deleteFile + - type: object + key: VectorStoreFileObject + path: file-object + - id: vector-stores-file-batches + title: Vector store file batches + description: | + Vector store file batches represent operations to add multiple files to a vector store. + Related guide: [File Search](https://platform.openai.com/docs/assistants/tools/file-search) + navigationGroup: vector_stores + sections: + - type: endpoint + key: createVectorStoreFileBatch + path: createBatch + - type: endpoint + key: getVectorStoreFileBatch + path: getBatch + - type: endpoint + key: cancelVectorStoreFileBatch + path: cancelBatch + - type: endpoint + key: listFilesInVectorStoreBatch + path: listBatchFiles + - type: object + key: VectorStoreFileBatchObject + path: batch-object + - id: containers + title: Containers + description: | + Create and manage containers for use with the Code Interpreter tool. + navigationGroup: containers + sections: + - type: endpoint + key: CreateContainer + path: createContainers + - type: endpoint + key: ListContainers + path: listContainers + - type: endpoint + key: RetrieveContainer + path: retrieveContainer + - type: endpoint + key: DeleteContainer + path: deleteContainer + - type: object + key: ContainerResource + path: object + - id: container-files + title: Container Files + description: | + Create and manage container files for use with the Code Interpreter tool. + navigationGroup: containers + sections: + - type: endpoint + key: CreateContainerFile + path: createContainerFile + - type: endpoint + key: ListContainerFiles + path: listContainerFiles + - type: endpoint + key: RetrieveContainerFile + path: retrieveContainerFile + - type: endpoint + key: RetrieveContainerFileContent + path: retrieveContainerFileContent + - type: endpoint + key: DeleteContainerFile + path: deleteContainerFile + - type: object + key: ContainerFileResource + path: object + - id: realtime + title: Realtime + description: | + Communicate with a multimodal model in real time over low latency interfaces + like WebRTC, WebSocket, and SIP. Natively supports speech-to-speech + as well as text, image, and audio inputs and outputs. + + [Learn more about the Realtime API](https://platform.openai.com/docs/guides/realtime). + navigationGroup: realtime + sections: + - type: endpoint + key: create-realtime-call + path: create-call + - id: realtime-sessions + title: Client secrets + description: > + REST API endpoint to generate ephemeral client secrets for use in client-side + + applications. Client secrets are short-lived tokens that can be passed to a client app, + + such as a web frontend or mobile client, which grants access to the Realtime API without + + leaking your main API key. You can configure a custom TTL for each client secret. + + + You can also attach session configuration options to the client secret, which will be + + applied to any sessions created using that client secret, but these can also be overridden + + by the client connection. + + + [Learn more about authentication with client secrets over + WebRTC](https://platform.openai.com/docs/guides/realtime-webrtc). + navigationGroup: realtime + sections: + - type: endpoint + key: create-realtime-client-secret + path: create-realtime-client-secret + - type: object + key: RealtimeCreateClientSecretResponse + path: create-secret-response + - id: realtime-calls + title: Calls + description: | + REST endpoints for controlling WebRTC or SIP calls with the Realtime API. + Accept or reject an incoming call, transfer it to another destination, or hang up the + call once you are finished. + navigationGroup: realtime + sections: + - type: endpoint + key: accept-realtime-call + path: accept-call + - type: endpoint + key: reject-realtime-call + path: reject-call + - type: endpoint + key: refer-realtime-call + path: refer-call + - type: endpoint + key: hangup-realtime-call + path: hangup-call + - id: realtime-client-events + title: Client events + description: | + These are events that the OpenAI Realtime WebSocket server will accept from the client. + navigationGroup: realtime + sections: + - type: object + key: RealtimeClientEventSessionUpdate + path: + - type: object + key: RealtimeClientEventInputAudioBufferAppend + path: + - type: object + key: RealtimeClientEventInputAudioBufferCommit + path: + - type: object + key: RealtimeClientEventInputAudioBufferClear + path: + - type: object + key: RealtimeClientEventConversationItemCreate + path: + - type: object + key: RealtimeClientEventConversationItemRetrieve + path: + - type: object + key: RealtimeClientEventConversationItemTruncate + path: + - type: object + key: RealtimeClientEventConversationItemDelete + path: + - type: object + key: RealtimeClientEventResponseCreate + path: + - type: object + key: RealtimeClientEventResponseCancel + path: + - type: object + key: RealtimeClientEventOutputAudioBufferClear + path: + - id: realtime-server-events + title: Server events + description: | + These are events emitted from the OpenAI Realtime WebSocket server to the client. + navigationGroup: realtime + sections: + - type: object + key: RealtimeServerEventError + path: + - type: object + key: RealtimeServerEventSessionCreated + path: + - type: object + key: RealtimeServerEventSessionUpdated + path: + - type: object + key: RealtimeServerEventConversationItemAdded + path: + - type: object + key: RealtimeServerEventConversationItemDone + path: + - type: object + key: RealtimeServerEventConversationItemRetrieved + path: + - type: object + key: RealtimeServerEventConversationItemInputAudioTranscriptionCompleted + path: + - type: object + key: RealtimeServerEventConversationItemInputAudioTranscriptionDelta + path: + - type: object + key: RealtimeServerEventConversationItemInputAudioTranscriptionSegment + path: + - type: object + key: RealtimeServerEventConversationItemInputAudioTranscriptionFailed + path: + - type: object + key: RealtimeServerEventConversationItemTruncated + path: + - type: object + key: RealtimeServerEventConversationItemDeleted + path: + - type: object + key: RealtimeServerEventInputAudioBufferCommitted + path: + - type: object + key: RealtimeServerEventInputAudioBufferCleared + path: + - type: object + key: RealtimeServerEventInputAudioBufferSpeechStarted + path: + - type: object + key: RealtimeServerEventInputAudioBufferSpeechStopped + path: + - type: object + key: RealtimeServerEventInputAudioBufferTimeoutTriggered + path: + - type: object + key: RealtimeServerEventOutputAudioBufferStarted + path: + - type: object + key: RealtimeServerEventOutputAudioBufferStopped + path: + - type: object + key: RealtimeServerEventOutputAudioBufferCleared + path: + - type: object + key: RealtimeServerEventResponseCreated + path: + - type: object + key: RealtimeServerEventResponseDone + path: + - type: object + key: RealtimeServerEventResponseOutputItemAdded + path: + - type: object + key: RealtimeServerEventResponseOutputItemDone + path: + - type: object + key: RealtimeServerEventResponseContentPartAdded + path: + - type: object + key: RealtimeServerEventResponseContentPartDone + path: + - type: object + key: RealtimeServerEventResponseTextDelta + path: + - type: object + key: RealtimeServerEventResponseTextDone + path: + - type: object + key: RealtimeServerEventResponseAudioTranscriptDelta + path: + - type: object + key: RealtimeServerEventResponseAudioTranscriptDone + path: + - type: object + key: RealtimeServerEventResponseAudioDelta + path: + - type: object + key: RealtimeServerEventResponseAudioDone + path: + - type: object + key: RealtimeServerEventResponseFunctionCallArgumentsDelta + path: + - type: object + key: RealtimeServerEventResponseFunctionCallArgumentsDone + path: + - type: object + key: RealtimeServerEventResponseMCPCallArgumentsDelta + path: + - type: object + key: RealtimeServerEventResponseMCPCallArgumentsDone + path: + - type: object + key: RealtimeServerEventResponseMCPCallInProgress + path: + - type: object + key: RealtimeServerEventResponseMCPCallCompleted + path: + - type: object + key: RealtimeServerEventResponseMCPCallFailed + path: + - type: object + key: RealtimeServerEventMCPListToolsInProgress + path: + - type: object + key: RealtimeServerEventMCPListToolsCompleted + path: + - type: object + key: RealtimeServerEventMCPListToolsFailed + path: + - type: object + key: RealtimeServerEventRateLimitsUpdated + path: + - id: chat + title: Chat Completions + description: > + The Chat Completions API endpoint will generate a model response from a + + list of messages comprising a conversation. + + + Related guides: + + - [Quickstart](https://platform.openai.com/docs/quickstart?api-mode=chat) + + - [Text inputs and outputs](https://platform.openai.com/docs/guides/text?api-mode=chat) + + - [Image inputs](https://platform.openai.com/docs/guides/images?api-mode=chat) + + - [Audio inputs and outputs](https://platform.openai.com/docs/guides/audio?api-mode=chat) + + - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat) + + - [Function calling](https://platform.openai.com/docs/guides/function-calling?api-mode=chat) + + - [Conversation state](https://platform.openai.com/docs/guides/conversation-state?api-mode=chat) + + + **Starting a new project?** We recommend trying + [Responses](https://platform.openai.com/docs/api-reference/responses) + + to take advantage of the latest OpenAI platform features. Compare + + [Chat Completions with + Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses). + navigationGroup: chat + sections: + - type: endpoint + key: createChatCompletion + path: create + - type: endpoint + key: getChatCompletion + path: get + - type: endpoint + key: getChatCompletionMessages + path: getMessages + - type: endpoint + key: listChatCompletions + path: list + - type: endpoint + key: updateChatCompletion + path: update + - type: endpoint + key: deleteChatCompletion + path: delete + - type: object + key: CreateChatCompletionResponse + path: object + - type: object + key: ChatCompletionList + path: list-object + - type: object + key: ChatCompletionMessageList + path: message-list + - id: chat-streaming + title: Streaming + description: | + Stream Chat Completions in real time. Receive chunks of completions + returned from the model using server-sent events. + [Learn more](https://platform.openai.com/docs/guides/streaming-responses?api-mode=chat). + navigationGroup: chat + sections: + - type: object + key: CreateChatCompletionStreamResponse + path: streaming + - id: assistants + title: Assistants + beta: true + description: | + Build assistants that can call models and use tools to perform tasks. + + [Get started with the Assistants API](https://platform.openai.com/docs/assistants) + navigationGroup: assistants + sections: + - type: endpoint + key: createAssistant + path: createAssistant + - type: endpoint + key: listAssistants + path: listAssistants + - type: endpoint + key: getAssistant + path: getAssistant + - type: endpoint + key: modifyAssistant + path: modifyAssistant + - type: endpoint + key: deleteAssistant + path: deleteAssistant + - type: object + key: AssistantObject + path: object + - id: threads + title: Threads + beta: true + description: | + Create threads that assistants can interact with. + + Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createThread + path: createThread + - type: endpoint + key: getThread + path: getThread + - type: endpoint + key: modifyThread + path: modifyThread + - type: endpoint + key: deleteThread + path: deleteThread + - type: object + key: ThreadObject + path: object + - id: messages + title: Messages + beta: true + description: | + Create messages within threads + + Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createMessage + path: createMessage + - type: endpoint + key: listMessages + path: listMessages + - type: endpoint + key: getMessage + path: getMessage + - type: endpoint + key: modifyMessage + path: modifyMessage + - type: endpoint + key: deleteMessage + path: deleteMessage + - type: object + key: MessageObject + path: object + - id: runs + title: Runs + beta: true + description: | + Represents an execution run on a thread. + + Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createRun + path: createRun + - type: endpoint + key: createThreadAndRun + path: createThreadAndRun + - type: endpoint + key: listRuns + path: listRuns + - type: endpoint + key: getRun + path: getRun + - type: endpoint + key: modifyRun + path: modifyRun + - type: endpoint + key: submitToolOuputsToRun + path: submitToolOutputs + - type: endpoint + key: cancelRun + path: cancelRun + - type: object + key: RunObject + path: object + - id: run-steps + title: Run steps + beta: true + description: | + Represents the steps (model and tool calls) taken during the run. + + Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: listRunSteps + path: listRunSteps + - type: endpoint + key: getRunStep + path: getRunStep + - type: object + key: RunStepObject + path: step-object + - id: assistants-streaming + title: Streaming + beta: true + description: > + Stream the result of executing a Run or resuming a Run after submitting tool outputs. + + You can stream events from the [Create Thread and + Run](https://platform.openai.com/docs/api-reference/runs/createThreadAndRun), + + [Create Run](https://platform.openai.com/docs/api-reference/runs/createRun), and [Submit Tool + Outputs](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) + + endpoints by passing `"stream": true`. The response will be a [Server-Sent + events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) stream. + + Our Node and Python SDKs provide helpful utilities to make streaming easy. Reference the + + [Assistants API quickstart](https://platform.openai.com/docs/assistants/overview) to learn more. + navigationGroup: assistants + sections: + - type: object + key: MessageDeltaObject + path: message-delta-object + - type: object + key: RunStepDeltaObject + path: run-step-delta-object + - type: object + key: AssistantStreamEvent + path: events + - id: administration + title: Administration + description: > + Programmatically manage your organization. + + The Audit Logs endpoint provides a log of all actions taken in the organization for security and + monitoring purposes. + + To access these endpoints please generate an Admin API Key through the [API Platform Organization + overview](/organization/admin-keys). Admin API keys cannot be used for non-administration endpoints. + + For best practices on setting up your organization, please refer to this + [guide](https://platform.openai.com/docs/guides/production-best-practices#setting-up-your-organization) + navigationGroup: administration + - id: admin-api-keys + title: Admin API Keys + description: > + Admin API keys enable Organization Owners to programmatically manage various aspects of their + organization, including users, projects, and API keys. These keys provide administrative capabilities, + such as creating, updating, and deleting users; managing projects; and overseeing API key lifecycles. + + + Key Features of Admin API Keys: + + + - User Management: Invite new users, update roles, and remove users from the organization. + + + - Project Management: Create, update, archive projects, and manage user assignments within projects. + + + - API Key Oversight: List, retrieve, and delete API keys associated with projects. + + + Only Organization Owners have the authority to create and utilize Admin API keys. To manage these + keys, Organization Owners can navigate to the Admin Keys section of their API Platform dashboard. + + + For direct access to the Admin Keys management page, Organization Owners can use the following link: + + + [https://platform.openai.com/settings/organization/admin-keys](https://platform.openai.com/settings/organization/admin-keys) + + + It's crucial to handle Admin API keys with care due to their elevated permissions. Adhering to best + practices, such as regular key rotation and assigning appropriate permissions, enhances security and + ensures proper governance within the organization. + navigationGroup: administration + sections: + - type: endpoint + key: admin-api-keys-list + path: list + - type: endpoint + key: admin-api-keys-create + path: create + - type: endpoint + key: admin-api-keys-get + path: listget + - type: endpoint + key: admin-api-keys-delete + path: delete + - type: object + key: AdminApiKey + path: object + - id: invite + title: Invites + description: Invite and manage invitations for an organization. + navigationGroup: administration + sections: + - type: endpoint + key: list-invites + path: list + - type: endpoint + key: inviteUser + path: create + - type: endpoint + key: retrieve-invite + path: retrieve + - type: endpoint + key: delete-invite + path: delete + - type: object + key: Invite + path: object + - id: users + title: Users + description: | + Manage users and their role in an organization. + navigationGroup: administration + sections: + - type: endpoint + key: list-users + path: list + - type: endpoint + key: modify-user + path: modify + - type: endpoint + key: retrieve-user + path: retrieve + - type: endpoint + key: delete-user + path: delete + - type: object + key: User + path: object + - id: projects + title: Projects + description: | + Manage the projects within an orgnanization includes creation, updating, and archiving or projects. + The Default project cannot be archived. + navigationGroup: administration + sections: + - type: endpoint + key: list-projects + path: list + - type: endpoint + key: create-project + path: create + - type: endpoint + key: retrieve-project + path: retrieve + - type: endpoint + key: modify-project + path: modify + - type: endpoint + key: archive-project + path: archive + - type: object + key: Project + path: object + - id: project-users + title: Project users + description: | + Manage users within a project, including adding, updating roles, and removing users. + navigationGroup: administration + sections: + - type: endpoint + key: list-project-users + path: list + - type: endpoint + key: create-project-user + path: create + - type: endpoint + key: retrieve-project-user + path: retrieve + - type: endpoint + key: modify-project-user + path: modify + - type: endpoint + key: delete-project-user + path: delete + - type: object + key: ProjectUser + path: object + - id: project-service-accounts + title: Project service accounts + description: > + Manage service accounts within a project. A service account is a bot user that is not associated with + a user. + + If a user leaves an organization, their keys and membership in projects will no longer work. Service + accounts + + do not have this limitation. However, service accounts can also be deleted from a project. + navigationGroup: administration + sections: + - type: endpoint + key: list-project-service-accounts + path: list + - type: endpoint + key: create-project-service-account + path: create + - type: endpoint + key: retrieve-project-service-account + path: retrieve + - type: endpoint + key: delete-project-service-account + path: delete + - type: object + key: ProjectServiceAccount + path: object + - id: project-api-keys + title: Project API keys + description: > + Manage API keys for a given project. Supports listing and deleting keys for users. + + This API does not allow issuing keys for users, as users need to authorize themselves to generate + keys. + navigationGroup: administration + sections: + - type: endpoint + key: list-project-api-keys + path: list + - type: endpoint + key: retrieve-project-api-key + path: retrieve + - type: endpoint + key: delete-project-api-key + path: delete + - type: object + key: ProjectApiKey + path: object + - id: project-rate-limits + title: Project rate limits + description: > + Manage rate limits per model for projects. Rate limits may be configured to be equal to or lower than + the organization's rate limits. + navigationGroup: administration + sections: + - type: endpoint + key: list-project-rate-limits + path: list + - type: endpoint + key: update-project-rate-limits + path: update + - type: object + key: ProjectRateLimit + path: object + - id: audit-logs + title: Audit logs + description: > + Logs of user actions and configuration changes within this organization. + + To log events, an Organization Owner must activate logging in the [Data Controls + Settings](/settings/organization/data-controls/data-retention). + + Once activated, for security reasons, logging cannot be deactivated. + navigationGroup: administration + sections: + - type: endpoint + key: list-audit-logs + path: list + - type: object + key: AuditLog + path: object + - id: usage + title: Usage + description: > + The **Usage API** provides detailed insights into your activity across the OpenAI API. It also + includes a separate [Costs endpoint](https://platform.openai.com/docs/api-reference/usage/costs), + which offers visibility into your spend, breaking down consumption by invoice line items and project + IDs. + + + While the Usage API delivers granular usage data, it may not always reconcile perfectly with the Costs + due to minor differences in how usage and spend are recorded. For financial purposes, we recommend + using the [Costs endpoint](https://platform.openai.com/docs/api-reference/usage/costs) or the [Costs + tab](/settings/organization/usage) in the Usage Dashboard, which will reconcile back to your billing + invoice. + navigationGroup: administration + sections: + - type: endpoint + key: usage-completions + path: completions + - type: object + key: UsageCompletionsResult + path: completions_object + - type: endpoint + key: usage-embeddings + path: embeddings + - type: object + key: UsageEmbeddingsResult + path: embeddings_object + - type: endpoint + key: usage-moderations + path: moderations + - type: object + key: UsageModerationsResult + path: moderations_object + - type: endpoint + key: usage-images + path: images + - type: object + key: UsageImagesResult + path: images_object + - type: endpoint + key: usage-audio-speeches + path: audio_speeches + - type: object + key: UsageAudioSpeechesResult + path: audio_speeches_object + - type: endpoint + key: usage-audio-transcriptions + path: audio_transcriptions + - type: object + key: UsageAudioTranscriptionsResult + path: audio_transcriptions_object + - type: endpoint + key: usage-vector-stores + path: vector_stores + - type: object + key: UsageVectorStoresResult + path: vector_stores_object + - type: endpoint + key: usage-code-interpreter-sessions + path: code_interpreter_sessions + - type: object + key: UsageCodeInterpreterSessionsResult + path: code_interpreter_sessions_object + - type: endpoint + key: usage-costs + path: costs + - type: object + key: CostsResult + path: costs_object + - id: certificates + beta: true + title: Certificates + description: > + Manage Mutual TLS certificates across your organization and projects. + + + [Learn more about Mutual + TLS.](https://help.openai.com/en/articles/10876024-openai-mutual-tls-beta-program) + navigationGroup: administration + sections: + - type: endpoint + key: uploadCertificate + path: uploadCertificate + - type: endpoint + key: getCertificate + path: getCertificate + - type: endpoint + key: modifyCertificate + path: modifyCertificate + - type: endpoint + key: deleteCertificate + path: deleteCertificate + - type: endpoint + key: listOrganizationCertificates + path: listOrganizationCertificates + - type: endpoint + key: listProjectCertificates + path: listProjectCertificates + - type: endpoint + key: activateOrganizationCertificates + path: activateOrganizationCertificates + - type: endpoint + key: deactivateOrganizationCertificates + path: deactivateOrganizationCertificates + - type: endpoint + key: activateProjectCertificates + path: activateProjectCertificates + - type: endpoint + key: deactivateProjectCertificates + path: deactivateProjectCertificates + - type: object + key: Certificate + path: object + - id: completions + title: Completions + legacy: true + navigationGroup: legacy + description: > + Given a prompt, the model will return one or more predicted completions along with the probabilities + of alternative tokens at each position. Most developer should use our [Chat Completions + API](https://platform.openai.com/docs/guides/text-generation#text-generation-models) to leverage our + best and newest models. + sections: + - type: endpoint + key: createCompletion + path: create + - type: object + key: CreateCompletionResponse + path: object + - id: realtime_beta + title: Realtime Beta + legacy: true + navigationGroup: legacy + description: > + Communicate with a multimodal model in real time over low latency interfaces like WebRTC, WebSocket, + and SIP. Natively supports speech-to-speech as well as text, image, and audio inputs and outputs. + + [Learn more about the Realtime API](https://platform.openai.com/docs/guides/realtime). + - id: realtime-beta-sessions + title: Realtime Beta session tokens + description: | + REST API endpoint to generate ephemeral session tokens for use in client-side + applications. + navigationGroup: legacy + sections: + - type: endpoint + key: create-realtime-session + path: create + - type: endpoint + key: create-realtime-transcription-session + path: create-transcription + - type: object + key: RealtimeSessionCreateResponse + path: session_object + - type: object + key: RealtimeTranscriptionSessionCreateResponse + path: transcription_session_object + - id: realtime-beta-client-events + title: Realtime Beta client events + description: | + These are events that the OpenAI Realtime WebSocket server will accept from the client. + navigationGroup: legacy + sections: + - type: object + key: RealtimeBetaClientEventSessionUpdate + path: + - type: object + key: RealtimeBetaClientEventInputAudioBufferAppend + path: + - type: object + key: RealtimeBetaClientEventInputAudioBufferCommit + path: + - type: object + key: RealtimeBetaClientEventInputAudioBufferClear + path: + - type: object + key: RealtimeBetaClientEventConversationItemCreate + path: + - type: object + key: RealtimeBetaClientEventConversationItemRetrieve + path: + - type: object + key: RealtimeBetaClientEventConversationItemTruncate + path: + - type: object + key: RealtimeBetaClientEventConversationItemDelete + path: + - type: object + key: RealtimeBetaClientEventResponseCreate + path: + - type: object + key: RealtimeBetaClientEventResponseCancel + path: + - type: object + key: RealtimeBetaClientEventTranscriptionSessionUpdate + path: + - type: object + key: RealtimeBetaClientEventOutputAudioBufferClear + path: + - id: realtime-beta-server-events + title: Realtime Beta server events + description: | + These are events emitted from the OpenAI Realtime WebSocket server to the client. + navigationGroup: legacy + sections: + - type: object + key: RealtimeBetaServerEventError + path: + - type: object + key: RealtimeBetaServerEventSessionCreated + path: + - type: object + key: RealtimeBetaServerEventSessionUpdated + path: + - type: object + key: RealtimeBetaServerEventTranscriptionSessionCreated + path: + - type: object + key: RealtimeBetaServerEventTranscriptionSessionUpdated + path: + - type: object + key: RealtimeBetaServerEventConversationItemCreated + path: + - type: object + key: RealtimeBetaServerEventConversationItemRetrieved + path: + - type: object + key: RealtimeBetaServerEventConversationItemInputAudioTranscriptionCompleted + path: + - type: object + key: RealtimeBetaServerEventConversationItemInputAudioTranscriptionDelta + path: + - type: object + key: RealtimeBetaServerEventConversationItemInputAudioTranscriptionSegment + path: + - type: object + key: RealtimeBetaServerEventConversationItemInputAudioTranscriptionFailed + path: + - type: object + key: RealtimeBetaServerEventConversationItemTruncated + path: + - type: object + key: RealtimeBetaServerEventConversationItemDeleted + path: + - type: object + key: RealtimeBetaServerEventInputAudioBufferCommitted + path: + - type: object + key: RealtimeBetaServerEventInputAudioBufferCleared + path: + - type: object + key: RealtimeBetaServerEventInputAudioBufferSpeechStarted + path: + - type: object + key: RealtimeBetaServerEventInputAudioBufferSpeechStopped + path: + - type: object + key: RealtimeServerEventInputAudioBufferTimeoutTriggered + path: + - type: object + key: RealtimeBetaServerEventResponseCreated + path: + - type: object + key: RealtimeBetaServerEventResponseDone + path: + - type: object + key: RealtimeBetaServerEventResponseOutputItemAdded + path: + - type: object + key: RealtimeBetaServerEventResponseOutputItemDone + path: + - type: object + key: RealtimeBetaServerEventResponseContentPartAdded + path: + - type: object + key: RealtimeBetaServerEventResponseContentPartDone + path: + - type: object + key: RealtimeBetaServerEventResponseTextDelta + path: + - type: object + key: RealtimeBetaServerEventResponseTextDone + path: + - type: object + key: RealtimeBetaServerEventResponseAudioTranscriptDelta + path: + - type: object + key: RealtimeBetaServerEventResponseAudioTranscriptDone + path: + - type: object + key: RealtimeBetaServerEventResponseAudioDelta + path: + - type: object + key: RealtimeBetaServerEventResponseAudioDone + path: + - type: object + key: RealtimeBetaServerEventResponseFunctionCallArgumentsDelta + path: + - type: object + key: RealtimeBetaServerEventResponseFunctionCallArgumentsDone + path: + - type: object + key: RealtimeBetaServerEventResponseMCPCallArgumentsDelta + path: + - type: object + key: RealtimeBetaServerEventResponseMCPCallArgumentsDone + path: + - type: object + key: RealtimeBetaServerEventResponseMCPCallInProgress + path: + - type: object + key: RealtimeBetaServerEventResponseMCPCallCompleted + path: + - type: object + key: RealtimeBetaServerEventResponseMCPCallFailed + path: + - type: object + key: RealtimeBetaServerEventMCPListToolsInProgress + path: + - type: object + key: RealtimeBetaServerEventMCPListToolsCompleted + path: + - type: object + key: RealtimeBetaServerEventMCPListToolsFailed + path: + - type: object + key: RealtimeBetaServerEventRateLimitsUpdated + path: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html deleted file mode 100644 index 38122ebc0..000000000 --- a/docs/static/stainless-llama-stack-spec.html +++ /dev/null @@ -1,18061 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - - - - - - - diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 93049a14a..51607d92d 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -1,20 +1,158 @@ openapi: 3.1.0 info: - title: >- - Llama Stack Specification - Stable & Experimental APIs - version: v1 - description: >- + title: Llama Stack Specification - Stable & Experimental APIs + description: |- This is the specification of the Llama Stack that provides - a set of endpoints and their corresponding interfaces that are - tailored to - best leverage Llama Models. + a set of endpoints and their corresponding interfaces that are + tailored to + best leverage Llama Models. - **🔗 COMBINED**: This specification includes both stable production-ready APIs - and experimental pre-release APIs. Use stable APIs for production deployments - and experimental APIs for testing new features. + **🔗 COMBINED**: This specification includes both stable production-ready APIs + and experimental pre-release APIs. Use stable APIs for production deployments + and experimental APIs for testing new features. + version: v1 servers: - - url: http://any-hosted-llama-stack.com +- url: http://any-hosted-llama-stack.com paths: + /v1/batches: + get: + responses: + '200': + description: A list of batch objects. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBatchesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: List Batches + description: List all batches for the current user. + operationId: list_batches_v1_batches_get + parameters: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + type: integer + default: 20 + title: Limit + post: + responses: + '200': + description: The created batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + $ref: '#/components/responses/BadRequest400' + description: Bad Request + '429': + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests + '500': + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error + default: + $ref: '#/components/responses/DefaultError' + description: Default Response + tags: + - Batches + summary: Create Batch + description: Create a new batch for processing multiple API requests. + operationId: create_batch_v1_batches_post + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateBatchRequest' + /v1/batches/{batch_id}: + get: + responses: + '200': + description: The batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Retrieve Batch + description: Retrieve information about a specific batch. + operationId: retrieve_batch_v1_batches__batch_id__get + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' + /v1/batches/{batch_id}/cancel: + post: + responses: + '200': + description: The updated batch object. + content: + application/json: + schema: + $ref: '#/components/schemas/Batch' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Batches + summary: Cancel Batch + description: Cancel a batch that is in progress. + operationId: cancel_batch_v1_batches__batch_id__cancel_post + parameters: + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/chat/completions: get: responses: @@ -26,48 +164,56 @@ paths: $ref: '#/components/schemas/ListOpenAIChatCompletionResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: List chat completions. + - Inference + summary: List Chat Completions description: List chat completions. + operationId: list_chat_completions_v1_chat_completions_get parameters: - - name: after - in: query - description: >- - The ID of the last chat completion to return. - required: false - schema: - type: string - - name: limit - in: query - description: >- - The maximum number of chat completions to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort the chat completions by: "asc" or "desc". Defaults to - "desc". - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -75,35 +221,36 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletion' - - $ref: '#/components/schemas/OpenAIChatCompletionChunk' + $ref: '#/components/schemas/OpenAIChatCompletion' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIChatCompletionChunk' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inference - summary: Create chat completions. - description: >- + - Inference + summary: Openai Chat Completion + description: |- Create chat completions. - Generate an OpenAI-compatible chat completion for the given messages using - the specified model. - parameters: [] + Generate an OpenAI-compatible chat completion for the given messages using the specified model. + operationId: openai_chat_completion_v1_chat_completions_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody' - required: true - deprecated: false /v1/chat/completions/{completion_id}: get: responses: @@ -114,30 +261,32 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletionWithInputMessages' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Get chat completion. - description: >- + - Inference + summary: Get Chat Completion + description: |- Get chat completion. Describe a chat completion by its ID. + operationId: get_chat_completion_v1_chat_completions__completion_id__get parameters: - - name: completion_id - in: path - description: ID of the chat completion. - required: true - schema: - type: string - deprecated: false + - name: completion_id + in: path + required: true + schema: + type: string + description: 'Path parameter: completion_id' /v1/completions: post: responses: @@ -148,31 +297,31 @@ paths: schema: $ref: '#/components/schemas/OpenAICompletion' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create completion. - description: >- + - Inference + summary: Openai Completion + description: |- Create completion. - Generate an OpenAI-compatible completion for the given prompt using the specified - model. - parameters: [] + Generate an OpenAI-compatible completion for the given prompt using the specified model. + operationId: openai_completion_v1_completions_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody' required: true - deprecated: false /v1/conversations: post: responses: @@ -183,30 +332,31 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Create a conversation. - description: >- + - Conversations + summary: Create Conversation + description: |- Create a conversation. Create a conversation. - parameters: [] + operationId: create_conversation_v1_conversations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreateConversationRequest' required: true - deprecated: false /v1/conversations/{conversation_id}: get: responses: @@ -217,30 +367,32 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve a conversation. - description: >- + - Conversations + summary: Get Conversation + description: |- Retrieve a conversation. Get a conversation with the given ID. + operationId: get_conversation_v1_conversations__conversation_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' post: responses: '200': @@ -250,36 +402,38 @@ paths: schema: $ref: '#/components/schemas/Conversation' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Update a conversation. - description: >- + - Conversations + summary: Update Conversation + description: |- Update a conversation. Update a conversation's metadata with the given ID. + operationId: update_conversation_v1_conversations__conversation_id__post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/UpdateConversationRequest' required: true - deprecated: false delete: responses: '200': @@ -289,30 +443,32 @@ paths: schema: $ref: '#/components/schemas/ConversationDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete a conversation. - description: >- + - Conversations + summary: Openai Delete Conversation + description: |- Delete a conversation. Delete a conversation with the given ID. + operationId: openai_delete_conversation_v1_conversations__conversation_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' /v1/conversations/{conversation_id}/items: get: responses: @@ -324,173 +480,68 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: List items. - description: >- + - Conversations + summary: List Items + description: |- List items. List items in the conversation. + operationId: list_items_v1_conversations__conversation_id__items_get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - enum: + - asc + - desc type: string - - name: after - in: query - description: >- - An item ID to list items after, used in pagination. - required: true - schema: - oneOf: - - type: string - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: include - in: query - description: >- - Specify additional output data to include in the response. - required: true - schema: - oneOf: - - type: array - items: - type: string - enum: - - code_interpreter_call.outputs - - computer_call_output.output.image_url - - file_search_call.results - - message.input_image.image_url - - message.output_text.logprobs - - reasoning.encrypted_content - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: limit - in: query - description: >- - A limit on the number of objects to be returned (1-100, default 20). - required: true - schema: - oneOf: - - type: integer - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - - name: order - in: query - description: >- - The order to return items in (asc or desc, default desc). - required: true - schema: - oneOf: - - type: string - enum: - - asc - - desc - - type: object - title: NotGiven - description: >- - A sentinel singleton class used to distinguish omitted keyword arguments - from those passed in with the value None (which may have different - behavior). - - For example: - - - ```py - - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... - - - - get(timeout=1) # 1s timeout - - get(timeout=None) # No timeout - - get() # Default timeout behavior, which may not be statically known - at the method definition. - - ``` - deprecated: false + - type: 'null' + title: Order + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array + items: + $ref: '#/components/schemas/ConversationItemInclude' + - type: 'null' + title: Include post: responses: '200': @@ -501,35 +552,37 @@ paths: $ref: '#/components/schemas/ConversationItemList' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Conversations - summary: Create items. - description: >- + - Conversations + summary: Add Items + description: |- Create items. Create items in the conversation. + operationId: add_items_v1_conversations__conversation_id__items_post parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/AddItemsRequest' - required: true - deprecated: false /v1/conversations/{conversation_id}/items/{item_id}: get: responses: @@ -538,38 +591,40 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ConversationItem' + $ref: '#/components/schemas/OpenAIResponseMessage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Retrieve an item. - description: >- + - Conversations + summary: Retrieve + description: |- Retrieve an item. Retrieve a conversation item. + operationId: retrieve_v1_conversations__conversation_id__items__item_id__get parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' delete: responses: '200': @@ -579,375 +634,378 @@ paths: schema: $ref: '#/components/schemas/ConversationItemDeletedResource' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Conversations - summary: Delete an item. - description: >- + - Conversations + summary: Openai Delete Conversation Item + description: |- Delete an item. Delete a conversation item. + operationId: openai_delete_conversation_item_v1_conversations__conversation_id__items__item_id__delete parameters: - - name: conversation_id - in: path - description: The conversation identifier. - required: true - schema: - type: string - - name: item_id - in: path - description: The item identifier. - required: true - schema: - type: string - deprecated: false + - name: conversation_id + in: path + required: true + schema: + type: string + description: 'Path parameter: conversation_id' + - name: item_id + in: path + required: true + schema: + type: string + description: 'Path parameter: item_id' /v1/embeddings: post: responses: '200': - description: >- - An OpenAIEmbeddingsResponse containing the embeddings. + description: An OpenAIEmbeddingsResponse containing the embeddings. content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: Create embeddings. - description: >- + - Inference + summary: Openai Embeddings + description: |- Create embeddings. - Generate OpenAI-compatible embeddings for the given input using the specified - model. - parameters: [] + Generate OpenAI-compatible embeddings for the given input using the specified model. + operationId: openai_embeddings_v1_embeddings_post requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAIEmbeddingsRequestWithExtraBody' required: true - deprecated: false /v1/files: get: responses: '200': - description: >- - An ListOpenAIFileResponse containing the list of files. + description: An ListOpenAIFileResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/ListOpenAIFileResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: List files. - description: >- + - Files + summary: Openai List Files + description: |- List files. Returns a list of files that belong to the user's organization. + operationId: openai_list_files_v1_files_get parameters: - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. For instance, if you make a list request and receive - 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo - in order to fetch the next page of the list. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 10,000, and the default is 10,000. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - $ref: '#/components/schemas/Order' - - name: purpose - in: query - description: >- - Only return files with the given purpose. - required: false - schema: - $ref: '#/components/schemas/OpenAIFilePurpose' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 10000 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: purpose + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/OpenAIFilePurpose' + - type: 'null' + title: Purpose post: responses: '200': - description: >- - An OpenAIFileObject representing the uploaded file. + description: An OpenAIFileObject representing the uploaded file. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Files - summary: Upload file. - description: >- + - Files + summary: Openai Upload File + description: |- Upload file. Upload a file that can be used across various endpoints. - The file upload should be a multipart form request with: - - file: The File object (not file name) to be uploaded. - - purpose: The intended purpose of the uploaded file. - - expires_after: Optional form values describing expiration for the file. - parameters: [] + operationId: openai_upload_file_v1_files_post requestBody: + required: true content: multipart/form-data: schema: - type: object - properties: - file: - type: string - format: binary - purpose: - $ref: '#/components/schemas/OpenAIFilePurpose' - expires_after: - $ref: '#/components/schemas/ExpiresAfter' - required: - - file - - purpose - required: true - deprecated: false + $ref: '#/components/schemas/Body_openai_upload_file_v1_files_post' /v1/files/{file_id}: get: responses: '200': - description: >- - An OpenAIFileObject containing file information. + description: An OpenAIFileObject containing file information. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file. - description: >- + - Files + summary: Openai Retrieve File + description: |- Retrieve file. Returns information about a specific file. + operationId: openai_retrieve_file_v1_files__file_id__get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' delete: responses: '200': - description: >- - An OpenAIFileDeleteResponse indicating successful deletion. + description: An OpenAIFileDeleteResponse indicating successful deletion. content: application/json: schema: $ref: '#/components/schemas/OpenAIFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Delete file. + - Files + summary: Openai Delete File description: Delete file. + operationId: openai_delete_file_v1_files__file_id__delete parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/files/{file_id}/content: get: responses: '200': - description: >- - The raw file content as a binary response. + description: The raw file content as a binary response. content: application/json: schema: $ref: '#/components/schemas/Response' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Files - summary: Retrieve file content. - description: >- + - Files + summary: Openai Retrieve File Content + description: |- Retrieve file content. Returns the contents of the specified file. + operationId: openai_retrieve_file_content_v1_files__file_id__content_get parameters: - - name: file_id - in: path - description: >- - The ID of the file to use for this request. - required: true - schema: - type: string - deprecated: false + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/health: get: responses: '200': - description: >- - Health information indicating if the service is operational. + description: Health information indicating if the service is operational. content: application/json: schema: $ref: '#/components/schemas/HealthInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get health status. - description: >- + - Inspect + summary: Health + description: |- Get health status. Get the current health status of the service. - parameters: [] - deprecated: false + operationId: health_v1_health_get /v1/inspect/routes: get: responses: '200': - description: >- - Response containing information about all available routes. + description: Response containing information about all available routes. content: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Inspect - summary: List routes. - description: >- + - Inspect + summary: List Routes + description: |- List routes. List all available API routes with their methods and implementing providers. - parameters: [] - deprecated: false + operationId: list_routes_v1_inspect_routes_get + parameters: + - name: api_filter + in: query + required: false + schema: + anyOf: + - enum: + - v1 + - v1alpha + - v1beta + - deprecated + type: string + - type: 'null' + title: Api Filter /v1/models: get: responses: '200': - description: A ListModelsResponse. + description: A OpenAIListModelsResponse. content: application/json: schema: - $ref: '#/components/schemas/ListModelsResponse' + $ref: '#/components/schemas/OpenAIListModelsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: List all models. - description: List all models. - parameters: [] - deprecated: false + - Models + summary: Openai List Models + description: List models using the OpenAI API. + operationId: openai_list_models_v1_models_get post: responses: '200': @@ -957,30 +1015,32 @@ paths: schema: $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: Register model. - description: >- + - Models + summary: Register Model + description: |- Register model. Register a model. - parameters: [] + operationId: register_model_v1_models_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterModelRequest' required: true - deprecated: false + deprecated: true /v1/models/{model_id}: get: responses: @@ -991,60 +1051,64 @@ paths: schema: $ref: '#/components/schemas/Model' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Models - summary: Get model. - description: >- + - Models + summary: Get Model + description: |- Get model. Get a model by its identifier. + operationId: get_model_v1_models__model_id__get parameters: - - name: model_id - in: path - description: The identifier of the model to get. - required: true - schema: - type: string - deprecated: false + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Models - summary: Unregister model. - description: >- + - Models + summary: Unregister Model + description: |- Unregister model. Unregister a model. + operationId: unregister_model_v1_models__model_id__delete parameters: - - name: model_id - in: path - description: >- - The identifier of the model to unregister. - required: true - schema: - type: string - deprecated: false + - name: model_id + in: path + required: true + schema: + type: string + description: 'Path parameter: model_id' + deprecated: true /v1/moderations: post: responses: @@ -1055,56 +1119,57 @@ paths: schema: $ref: '#/components/schemas/ModerationObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Create moderation. - description: >- + - Safety + summary: Run Moderation + description: |- Create moderation. Classifies if text and/or image inputs are potentially harmful. - parameters: [] + operationId: run_moderation_v1_moderations_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunModerationRequest' required: true - deprecated: false /v1/prompts: get: responses: '200': - description: >- - A ListPromptsResponse containing all prompts. + description: A ListPromptsResponse containing all prompts. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List all prompts. + - Prompts + summary: List Prompts description: List all prompts. - parameters: [] - deprecated: false + operationId: list_prompts_v1_prompts_get post: responses: '200': @@ -1114,30 +1179,31 @@ paths: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Create prompt. - description: >- + - Prompts + summary: Create Prompt + description: |- Create prompt. Create a new prompt. - parameters: [] + operationId: create_prompt_v1_prompts_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CreatePromptRequest' required: true - deprecated: false /v1/prompts/{prompt_id}: get: responses: @@ -1149,246 +1215,254 @@ paths: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Get prompt. - description: >- + - Prompts + summary: Get Prompt + description: |- Get prompt. Get a prompt by its identifier and optional version. + operationId: get_prompt_v1_prompts__prompt_id__get parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to get. - required: true - schema: - type: string - - name: version - in: query - description: >- - The version of the prompt to get (defaults to latest). - required: false - schema: - type: integer - deprecated: false + - name: version + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Version + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' post: responses: '200': - description: >- - The updated Prompt resource with incremented version. + description: The updated Prompt resource with incremented version. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Prompts - summary: Update prompt. - description: >- + - Prompts + summary: Update Prompt + description: |- Update prompt. Update an existing prompt (increments version). + operationId: update_prompt_v1_prompts__prompt_id__post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to update. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/UpdatePromptRequest' - required: true - deprecated: false delete: responses: - '200': - description: OK '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response + '204': + description: Successful Response tags: - - Prompts - summary: Delete prompt. - description: >- + - Prompts + summary: Delete Prompt + description: |- Delete prompt. Delete a prompt. + operationId: delete_prompt_v1_prompts__prompt_id__delete parameters: - - name: prompt_id - in: path - description: The identifier of the prompt to delete. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/prompts/{prompt_id}/set-default-version: post: responses: '200': - description: >- - The prompt with the specified version now set as default. + description: The prompt with the specified version now set as default. content: application/json: schema: $ref: '#/components/schemas/Prompt' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: Set prompt version. - description: >- + - Prompts + summary: Set Default Version + description: |- Set prompt version. Set which version of a prompt should be the default in get_prompt (latest). + operationId: set_default_version_v1_prompts__prompt_id__set_default_version_post parameters: - - name: prompt_id - in: path - description: The identifier of the prompt. - required: true - schema: - type: string + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/SetDefaultVersionRequest' required: true - deprecated: false /v1/prompts/{prompt_id}/versions: get: responses: '200': - description: >- - A ListPromptsResponse containing all versions of the prompt. + description: A ListPromptsResponse containing all versions of the prompt. content: application/json: schema: $ref: '#/components/schemas/ListPromptsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Prompts - summary: List prompt versions. - description: >- + - Prompts + summary: List Prompt Versions + description: |- List prompt versions. List all versions of a specific prompt. + operationId: list_prompt_versions_v1_prompts__prompt_id__versions_get parameters: - - name: prompt_id - in: path - description: >- - The identifier of the prompt to list versions for. - required: true - schema: - type: string - deprecated: false + - name: prompt_id + in: path + required: true + schema: + type: string + description: 'Path parameter: prompt_id' /v1/providers: get: responses: '200': - description: >- - A ListProvidersResponse containing information about all providers. + description: A ListProvidersResponse containing information about all providers. content: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: List providers. - description: >- + - Providers + summary: List Providers + description: |- List providers. List all available providers. - parameters: [] - deprecated: false + operationId: list_providers_v1_providers_get /v1/providers/{provider_id}: get: responses: '200': - description: >- - A ProviderInfo object containing the provider's details. + description: A ProviderInfo object containing the provider's details. content: application/json: schema: $ref: '#/components/schemas/ProviderInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Providers - summary: Get provider. - description: >- + - Providers + summary: Inspect Provider + description: |- Get provider. Get detailed information about a specific provider. + operationId: inspect_provider_v1_providers__provider_id__get parameters: - - name: provider_id - in: path - description: The ID of the provider to inspect. - required: true - schema: - type: string - deprecated: false + - name: provider_id + in: path + required: true + schema: + type: string + description: 'Path parameter: provider_id' /v1/responses: get: responses: @@ -1400,45 +1474,56 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List all responses. + - Agents + summary: List Openai Responses description: List all responses. + operationId: list_openai_responses_v1_responses_get parameters: - - name: after - in: query - description: The ID of the last response to return. - required: false - schema: - type: string - - name: limit - in: query - description: The number of responses to return. - required: false - schema: - type: integer - - name: model - in: query - description: The model to filter responses by. - required: false - schema: - type: string - - name: order - in: query - description: >- - The order to sort responses by when sorted by created_at ('asc' or 'desc'). - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 50 + title: Limit + - name: model + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Model + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order post: responses: '200': @@ -1452,38 +1537,51 @@ paths: $ref: '#/components/schemas/OpenAIResponseObjectStream' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: Create a model response. + - Agents + summary: Create Openai Response description: Create a model response. - parameters: [] + operationId: create_openai_response_v1_responses_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/CreateOpenaiResponseRequest' - required: true - deprecated: false - x-llama-stack-extra-body-params: - - name: guardrails - schema: - type: array - items: - oneOf: + x-llama-stack-extra-body-params: + guardrails: + $defs: + ResponseGuardrailSpec: + description: |- + Specification for a guardrail to apply during response generation. + + :param type: The type/identifier of the guardrail. + properties: + type: + title: Type + type: string + required: + - type + title: ResponseGuardrailSpec + type: object + anyOf: + - items: + anyOf: - type: string - $ref: '#/components/schemas/ResponseGuardrailSpec' - description: >- - List of guardrails to apply during response generation. Guardrails provide - safety and content moderation. - required: false + type: array + - type: 'null' + description: List of guardrails to apply during response generation. Guardrails provide safety and content moderation. /v1/responses/{response_id}: get: responses: @@ -1494,28 +1592,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Get a model response. + - Agents + summary: Get Openai Response description: Get a model response. + operationId: get_openai_response_v1_responses__response_id__get parameters: - - name: response_id - in: path - description: >- - The ID of the OpenAI response to retrieve. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' delete: responses: '200': @@ -1525,27 +1624,29 @@ paths: schema: $ref: '#/components/schemas/OpenAIDeleteResponseObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Agents - summary: Delete a response. + - Agents + summary: Delete Openai Response description: Delete a response. + operationId: delete_openai_response_v1_responses__response_id__delete parameters: - - name: response_id - in: path - description: The ID of the OpenAI response to delete. - required: true - schema: - type: string - deprecated: false + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' /v1/responses/{response_id}/input_items: get: responses: @@ -1557,65 +1658,72 @@ paths: $ref: '#/components/schemas/ListOpenAIResponseInputItem' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - Agents - summary: List input items. + - Agents + summary: List Openai Response Input Items description: List input items. + operationId: list_openai_response_input_items_v1_responses__response_id__input_items_get parameters: - - name: response_id - in: path - description: >- - The ID of the response to retrieve input items for. - required: true - schema: - type: string - - name: after - in: query - description: >- - An item ID to list items after, used for pagination. - required: false - schema: - type: string - - name: before - in: query - description: >- - An item ID to list items before, used for pagination. - required: false - schema: - type: string - - name: include - in: query - description: >- - Additional fields to include in the response. - required: false - schema: - type: array + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/Order' + - type: 'null' + default: desc + title: Order + - name: response_id + in: path + required: true + schema: + type: string + description: 'Path parameter: response_id' + - name: include + in: query + required: false + schema: + anyOf: + - type: array items: type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - The order to return the input items in. Default is desc. - required: false - schema: - $ref: '#/components/schemas/Order' - deprecated: false + - type: 'null' + title: Include /v1/safety/run-shield: post: responses: @@ -1626,30 +1734,31 @@ paths: schema: $ref: '#/components/schemas/RunShieldResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Safety - summary: Run shield. - description: >- + - Safety + summary: Run Shield + description: |- Run shield. Run a shield. - parameters: [] + operationId: run_shield_v1_safety_run_shield_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RunShieldRequest' required: true - deprecated: false /v1/scoring-functions: get: responses: @@ -1660,47 +1769,50 @@ paths: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: List all scoring functions. + - Scoring Functions + summary: List Scoring Functions description: List all scoring functions. - parameters: [] - deprecated: false + operationId: list_scoring_functions_v1_scoring_functions_get post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ScoringFunctions - summary: Register a scoring function. + - Scoring Functions + summary: Register Scoring Function description: Register a scoring function. - parameters: [] + operationId: register_scoring_function_v1_scoring_functions_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterScoringFunctionRequest' required: true - deprecated: false + deprecated: true /v1/scoring-functions/{scoring_fn_id}: get: responses: @@ -1711,86 +1823,90 @@ paths: schema: $ref: '#/components/schemas/ScoringFn' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ScoringFunctions - summary: Get a scoring function by its ID. + - Scoring Functions + summary: Get Scoring Function description: Get a scoring function by its ID. + operationId: get_scoring_function_v1_scoring_functions__scoring_fn_id__get parameters: - - name: scoring_fn_id - in: path - description: The ID of the scoring function to get. - required: true - schema: - type: string - deprecated: false + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ScoringFunctions - summary: Unregister a scoring function. + - Scoring Functions + summary: Unregister Scoring Function description: Unregister a scoring function. + operationId: unregister_scoring_function_v1_scoring_functions__scoring_fn_id__delete parameters: - - name: scoring_fn_id - in: path - description: >- - The ID of the scoring function to unregister. - required: true - schema: - type: string - deprecated: false + - name: scoring_fn_id + in: path + required: true + schema: + type: string + description: 'Path parameter: scoring_fn_id' + deprecated: true /v1/scoring/score: post: responses: '200': - description: >- - A ScoreResponse object containing rows and aggregated results. + description: A ScoreResponse object containing rows and aggregated results. content: application/json: schema: $ref: '#/components/schemas/ScoreResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a list of rows. + - Scoring + summary: Score description: Score a list of rows. - parameters: [] + operationId: score_v1_scoring_score_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreRequest' required: true - deprecated: false /v1/scoring/score-batch: post: responses: @@ -1801,27 +1917,28 @@ paths: schema: $ref: '#/components/schemas/ScoreBatchResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Scoring - summary: Score a batch of rows. + - Scoring + summary: Score Batch description: Score a batch of rows. - parameters: [] + operationId: score_batch_v1_scoring_score_batch_post requestBody: content: application/json: schema: $ref: '#/components/schemas/ScoreBatchRequest' required: true - deprecated: false /v1/shields: get: responses: @@ -1832,21 +1949,22 @@ paths: schema: $ref: '#/components/schemas/ListShieldsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: List all shields. + - Shields + summary: List Shields description: List all shields. - parameters: [] - deprecated: false + operationId: list_shields_v1_shields_get post: responses: '200': @@ -1856,27 +1974,29 @@ paths: schema: $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: Register a shield. + - Shields + summary: Register Shield description: Register a shield. - parameters: [] + operationId: register_shield_v1_shields_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterShieldRequest' required: true - deprecated: false + deprecated: true /v1/shields/{identifier}: get: responses: @@ -1887,88 +2007,58 @@ paths: schema: $ref: '#/components/schemas/Shield' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Shields - summary: Get a shield by its identifier. + - Shields + summary: Get Shield description: Get a shield by its identifier. + operationId: get_shield_v1_shields__identifier__get parameters: - - name: identifier - in: path - description: The identifier of the shield to get. - required: true - schema: - type: string - deprecated: false + - name: identifier + in: path + required: true + schema: + type: string + description: 'Path parameter: identifier' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Shields - summary: Unregister a shield. + - Shields + summary: Unregister Shield description: Unregister a shield. + operationId: unregister_shield_v1_shields__identifier__delete parameters: - - name: identifier - in: path - description: >- - The identifier of the shield to unregister. - required: true - schema: - type: string - deprecated: false - /v1/synthetic-data-generation/generate: - post: - responses: - '200': - description: >- - Response containing filtered synthetic data samples and optional statistics - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - SyntheticDataGeneration (Coming Soon) - summary: >- - Generate synthetic data based on input dialogs and apply filtering. - description: >- - Generate synthetic data based on input dialogs and apply filtering. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerateRequest' + - name: identifier + in: path required: true - deprecated: false + schema: + type: string + description: 'Path parameter: identifier' + deprecated: true /v1/tool-runtime/invoke: post: responses: @@ -1979,27 +2069,29 @@ paths: schema: $ref: '#/components/schemas/ToolInvocationResult' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolRuntime - summary: Run a tool with the given arguments. + - Tool Runtime + summary: Invoke Tool description: Run a tool with the given arguments. - parameters: [] + operationId: invoke_tool_v1_tool_runtime_invoke_post requestBody: content: application/json: schema: $ref: '#/components/schemas/InvokeToolRequest' required: true - deprecated: false + deprecated: true /v1/tool-runtime/list-tools: get: responses: @@ -2011,97 +2103,47 @@ paths: $ref: '#/components/schemas/ListToolDefsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - ToolRuntime - summary: List all tools in the runtime. + - Tool Runtime + summary: List Runtime Tools description: List all tools in the runtime. + operationId: list_runtime_tools_v1_tool_runtime_list_tools_get parameters: - - name: tool_group_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - - name: mcp_endpoint - in: query - description: >- - The MCP endpoint to use for the tool group. - required: false - schema: - $ref: '#/components/schemas/URL' - deprecated: false - /v1/tool-runtime/rag-tool/insert: - post: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Index documents so they can be used by the RAG system. - description: >- - Index documents so they can be used by the RAG system. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/InsertRequest' - required: true - deprecated: false - /v1/tool-runtime/rag-tool/query: - post: - responses: - '200': - description: >- - RAGQueryResult containing the retrieved content and metadata - content: - application/json: - schema: - $ref: '#/components/schemas/RAGQueryResult' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - ToolRuntime - summary: >- - Query the RAG system for context; typically invoked by the agent. - description: >- - Query the RAG system for context; typically invoked by the agent. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/QueryRequest' - required: true - deprecated: false + - name: authorization + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Authorization + - name: tool_group_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Tool Group Id + - name: mcp_endpoint + in: query + required: false + schema: + anyOf: + - $ref: '#/components/schemas/URL' + - type: 'null' + title: Mcp Endpoint + deprecated: true /v1/toolgroups: get: responses: @@ -2112,47 +2154,51 @@ paths: schema: $ref: '#/components/schemas/ListToolGroupsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: List tool groups with optional provider. + - Tool Groups + summary: List Tool Groups description: List tool groups with optional provider. - parameters: [] - deprecated: false + operationId: list_tool_groups_v1_toolgroups_get + deprecated: true post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ToolGroups - summary: Register a tool group. + - Tool Groups + summary: Register Tool Group description: Register a tool group. - parameters: [] + operationId: register_tool_group_v1_toolgroups_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterToolGroupRequest' required: true - deprecated: false + deprecated: true /v1/toolgroups/{toolgroup_id}: get: responses: @@ -2163,53 +2209,59 @@ paths: schema: $ref: '#/components/schemas/ToolGroup' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: Get a tool group by its ID. + - Tool Groups + summary: Get Tool Group description: Get a tool group by its ID. + operationId: get_tool_group_v1_toolgroups__toolgroup_id__get parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to get. - required: true - schema: - type: string - deprecated: false + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' + deprecated: true delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - ToolGroups - summary: Unregister a tool group. + - Tool Groups + summary: Unregister Toolgroup description: Unregister a tool group. + operationId: unregister_toolgroup_v1_toolgroups__toolgroup_id__delete parameters: - - name: toolgroup_id - in: path - description: The ID of the tool group to unregister. - required: true - schema: - type: string - deprecated: false + - name: toolgroup_id + in: path + required: true + schema: + type: string + description: 'Path parameter: toolgroup_id' + deprecated: true /v1/tools: get: responses: @@ -2221,27 +2273,31 @@ paths: $ref: '#/components/schemas/ListToolDefsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - ToolGroups - summary: List tools with optional tool group. + - Tool Groups + summary: List Tools description: List tools with optional tool group. + operationId: list_tools_v1_tools_get parameters: - - name: toolgroup_id - in: query - description: >- - The ID of the tool group to list tools for. - required: false - schema: - type: string - deprecated: false + - name: toolgroup_id + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Toolgroup Id + deprecated: true /v1/tools/{tool_name}: get: responses: @@ -2252,54 +2308,58 @@ paths: schema: $ref: '#/components/schemas/ToolDef' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - ToolGroups - summary: Get a tool by its name. + - Tool Groups + summary: Get Tool description: Get a tool by its name. + operationId: get_tool_v1_tools__tool_name__get parameters: - - name: tool_name - in: path - description: The name of the tool to get. - required: true - schema: - type: string - deprecated: false + - name: tool_name + in: path + required: true + schema: + type: string + description: 'Path parameter: tool_name' + deprecated: true /v1/vector-io/insert: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - VectorIO - summary: Insert chunks into a vector database. + - Vector Io + summary: Insert Chunks description: Insert chunks into a vector database. - parameters: [] + operationId: insert_chunks_v1_vector_io_insert_post requestBody: content: application/json: schema: $ref: '#/components/schemas/InsertChunksRequest' required: true - deprecated: false /v1/vector-io/query: post: responses: @@ -2310,800 +2370,829 @@ paths: schema: $ref: '#/components/schemas/QueryChunksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Query chunks from a vector database. + - Vector Io + summary: Query Chunks description: Query chunks from a vector database. - parameters: [] + operationId: query_chunks_v1_vector_io_query_post requestBody: content: application/json: schema: $ref: '#/components/schemas/QueryChunksRequest' required: true - deprecated: false /v1/vector_stores: get: responses: '200': - description: >- - A VectorStoreListResponse containing the list of vector stores. + description: A VectorStoreListResponse containing the list of vector stores. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Returns a list of vector stores. + - Vector Io + summary: Openai List Vector Stores description: Returns a list of vector stores. + operationId: openai_list_vector_stores_v1_vector_stores_get parameters: - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order post: responses: '200': - description: >- - A VectorStoreObject representing the created vector store. + description: A VectorStoreObject representing the created vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Creates a vector store. - description: >- + - Vector Io + summary: Openai Create Vector Store + description: |- Creates a vector store. Generate an OpenAI-compatible vector store with the given parameters. - parameters: [] + operationId: openai_create_vector_store_v1_vector_stores_post requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreRequestWithExtraBody' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}: get: responses: '200': - description: >- - A VectorStoreObject representing the vector store. + description: A VectorStoreObject representing the vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store. + - Vector Io + summary: Openai Retrieve Vector Store description: Retrieves a vector store. + operationId: openai_retrieve_vector_store_v1_vector_stores__vector_store_id__get parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreObject representing the updated vector store. + description: A VectorStoreObject representing the updated vector store. content: application/json: schema: $ref: '#/components/schemas/VectorStoreObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store. + - Vector Io + summary: Openai Update Vector Store description: Updates a vector store. + operationId: openai_update_vector_store_v1_vector_stores__vector_store_id__post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreDeleteResponse indicating the deletion status. + description: A VectorStoreDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store. + - Vector Io + summary: Openai Delete Vector Store description: Delete a vector store. + operationId: openai_delete_vector_store_v1_vector_stores__vector_store_id__delete parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' /v1/vector_stores/{vector_store_id}/file_batches: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the created file batch. + description: A VectorStoreFileBatchObject representing the created file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Create a vector store file batch. - description: >- + - Vector Io + summary: Openai Create Vector Store File Batch + description: |- Create a vector store file batch. - Generate an OpenAI-compatible vector store file batch for the given vector - store. + Generate an OpenAI-compatible vector store file batch for the given vector store. + operationId: openai_create_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to create the file batch for. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenAICreateVectorStoreFileBatchRequestWithExtraBody' required: true - deprecated: false /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}: get: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the file batch. + description: A VectorStoreFileBatchObject representing the file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieve a vector store file batch. + - Vector Io + summary: Openai Retrieve Vector Store File Batch description: Retrieve a vector store file batch. + operationId: openai_retrieve_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__get parameters: - - name: batch_id - in: path - description: The ID of the file batch to retrieve. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel: post: responses: '200': - description: >- - A VectorStoreFileBatchObject representing the cancelled file batch. + description: A VectorStoreFileBatchObject representing the cancelled file batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileBatchObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Cancels a vector store file batch. + - Vector Io + summary: Openai Cancel Vector Store File Batch description: Cancels a vector store file batch. + operationId: openai_cancel_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__cancel_post parameters: - - name: batch_id - in: path - description: The ID of the file batch to cancel. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files: get: responses: '200': - description: >- - A VectorStoreFilesListInBatchResponse containing the list of files in - the batch. + description: A VectorStoreFilesListInBatchResponse containing the list of files in the batch. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFilesListInBatchResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Returns a list of vector store files in a batch. - description: >- - Returns a list of vector store files in a batch. + - Vector Io + summary: Openai List Files In Vector Store File Batch + description: Returns a list of vector store files in a batch. + operationId: openai_list_files_in_vector_store_file_batch_v1_vector_stores__vector_store_id__file_batches__batch_id__files_get parameters: - - name: batch_id - in: path - description: >- - The ID of the file batch to list files from. - required: true - schema: - type: string - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file batch. - required: true - schema: - type: string - - name: after - in: query - description: >- - A cursor for use in pagination. `after` is an object ID that defines your - place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - A cursor for use in pagination. `before` is an object ID that defines - your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - Filter by file status. One of in_progress, completed, failed, cancelled. - required: false - schema: - type: string - - name: limit - in: query - description: >- - A limit on the number of objects to be returned. Limit can range between - 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - Sort order by the `created_at` timestamp of the objects. `asc` for ascending - order and `desc` for descending order. - required: false - schema: - type: string - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Filter + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: batch_id + in: path + required: true + schema: + type: string + description: 'Path parameter: batch_id' /v1/vector_stores/{vector_store_id}/files: get: responses: '200': - description: >- - A VectorStoreListFilesResponse containing the list of files. + description: A VectorStoreListFilesResponse containing the list of files. content: application/json: schema: $ref: '#/components/schemas/VectorStoreListFilesResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: List files in a vector store. + - Vector Io + summary: Openai List Files In Vector Store description: List files in a vector store. + operationId: openai_list_files_in_vector_store_v1_vector_stores__vector_store_id__files_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to list files from. - required: true - schema: - type: string - - name: limit - in: query - description: >- - (Optional) A limit on the number of objects to be returned. Limit can - range between 1 and 100, and the default is 20. - required: false - schema: - type: integer - - name: order - in: query - description: >- - (Optional) Sort order by the `created_at` timestamp of the objects. `asc` - for ascending order and `desc` for descending order. - required: false - schema: - type: string - - name: after - in: query - description: >- - (Optional) A cursor for use in pagination. `after` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: before - in: query - description: >- - (Optional) A cursor for use in pagination. `before` is an object ID that - defines your place in the list. - required: false - schema: - type: string - - name: filter - in: query - description: >- - (Optional) Filter by file status to only return files with the specified - status. - required: false - schema: - $ref: '#/components/schemas/VectorStoreFileStatus' - deprecated: false + - name: after + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: After + - name: before + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Before + - name: filter + in: query + required: false + schema: + title: Filter + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed + nullable: true + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + default: 20 + title: Limit + - name: order + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + default: desc + title: Order + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the attached file. + description: A VectorStoreFileObject representing the attached file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: Attach a file to a vector store. + - Vector Io + summary: Openai Attach File To Vector Store description: Attach a file to a vector store. + operationId: openai_attach_file_to_vector_store_v1_vector_stores__vector_store_id__files_post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store to attach the file to. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: + required: true content: application/json: schema: $ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest' - required: true - deprecated: false /v1/vector_stores/{vector_store_id}/files/{file_id}: get: responses: '200': - description: >- - A VectorStoreFileObject representing the file. + description: A VectorStoreFileObject representing the file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Retrieves a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File description: Retrieves a vector store file. + operationId: openai_retrieve_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' post: responses: '200': - description: >- - A VectorStoreFileObject representing the updated file. + description: A VectorStoreFileObject representing the updated file. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileObject' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Updates a vector store file. + - Vector Io + summary: Openai Update Vector Store File description: Updates a vector store file. + operationId: openai_update_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__post parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to update. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to update. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiUpdateVectorStoreFileRequest' required: true - deprecated: false delete: responses: '200': - description: >- - A VectorStoreFileDeleteResponse indicating the deletion status. + description: A VectorStoreFileDeleteResponse indicating the deletion status. content: application/json: schema: $ref: '#/components/schemas/VectorStoreFileDeleteResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Delete a vector store file. + - Vector Io + summary: Openai Delete Vector Store File description: Delete a vector store file. + operationId: openai_delete_vector_store_file_v1_vector_stores__vector_store_id__files__file_id__delete parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to delete. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to delete. - required: true - schema: - type: string - deprecated: false + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/files/{file_id}/content: get: responses: '200': - description: >- - A list of InterleavedContent representing the file contents. + description: File contents, optionally with embeddings and metadata based on query parameters. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - VectorIO - summary: >- - Retrieves the contents of a vector store file. - description: >- - Retrieves the contents of a vector store file. + - Vector Io + summary: Openai Retrieve Vector Store File Contents + description: Retrieves the contents of a vector store file. + operationId: openai_retrieve_vector_store_file_contents_v1_vector_stores__vector_store_id__files__file_id__content_get parameters: - - name: vector_store_id - in: path - description: >- - The ID of the vector store containing the file to retrieve. - required: true - schema: - type: string - - name: file_id - in: path - description: The ID of the file to retrieve. - required: true - schema: - type: string - deprecated: false + - name: include_embeddings + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Embeddings + - name: include_metadata + in: query + required: false + schema: + anyOf: + - type: boolean + - type: 'null' + default: false + title: Include Metadata + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' + - name: file_id + in: path + required: true + schema: + type: string + description: 'Path parameter: file_id' /v1/vector_stores/{vector_store_id}/search: post: responses: '200': - description: >- - A VectorStoreSearchResponse containing the search results. + description: A VectorStoreSearchResponse containing the search results. content: application/json: schema: $ref: '#/components/schemas/VectorStoreSearchResponsePage' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - VectorIO - summary: Search for chunks in a vector store. - description: >- + - Vector Io + summary: Openai Search Vector Store + description: |- Search for chunks in a vector store. - Searches a vector store for relevant chunks based on a query and optional - file attribute filters. + Searches a vector store for relevant chunks based on a query and optional file attribute filters. + operationId: openai_search_vector_store_v1_vector_stores__vector_store_id__search_post parameters: - - name: vector_store_id - in: path - description: The ID of the vector store to search. - required: true - schema: - type: string + - name: vector_store_id + in: path + required: true + schema: + type: string + description: 'Path parameter: vector_store_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' required: true - deprecated: false /v1/version: get: responses: '200': - description: >- - Version information containing the service version number. + description: Version information containing the service version number. content: application/json: schema: $ref: '#/components/schemas/VersionInfo' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inspect - summary: Get version. - description: >- + - Inspect + summary: Version + description: |- Get version. Get the version of the service. - parameters: [] - deprecated: false + operationId: version_v1_version_get /v1beta/datasetio/append-rows/{dataset_id}: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - DatasetIO - summary: Append rows to a dataset. + - Datasetio + summary: Append Rows description: Append rows to a dataset. + operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to append the rows to. - required: true - schema: - type: string + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/AppendRowsRequest' required: true - deprecated: false /v1beta/datasetio/iterrows/{dataset_id}: get: responses: @@ -3115,55 +3204,53 @@ paths: $ref: '#/components/schemas/PaginatedResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - DatasetIO - summary: >- - Get a paginated list of rows from a dataset. - description: >- + - Datasetio + summary: Iterrows + description: |- Get a paginated list of rows from a dataset. Uses offset-based pagination where: - - start_index: The starting index (0-based). If None, starts from beginning. - - limit: Number of items to return. If None or -1, returns all items. - The response includes: - - data: List of items for the current page. - - has_more: Whether there are more items available after this set. + operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get parameters: - - name: dataset_id - in: path - description: >- - The ID of the dataset to get the rows from. - required: true - schema: - type: string - - name: start_index - in: query - description: >- - Index into dataset for the first row to get. Get all rows if None. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of rows to get. - required: false - schema: - type: integer - deprecated: false + - name: limit + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Limit + - name: start_index + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Start Index + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' /v1beta/datasets: get: responses: @@ -3174,21 +3261,22 @@ paths: schema: $ref: '#/components/schemas/ListDatasetsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: List all datasets. + - Datasets + summary: List Datasets description: List all datasets. - parameters: [] - deprecated: false + operationId: list_datasets_v1beta_datasets_get post: responses: '200': @@ -3198,27 +3286,29 @@ paths: schema: $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: Register a new dataset. + - Datasets + summary: Register Dataset description: Register a new dataset. - parameters: [] + operationId: register_dataset_v1beta_datasets_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterDatasetRequest' required: true - deprecated: false + deprecated: true /v1beta/datasets/{dataset_id}: get: responses: @@ -3229,550 +3319,58 @@ paths: schema: $ref: '#/components/schemas/Dataset' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Datasets - summary: Get a dataset by its ID. + - Datasets + summary: Get Dataset description: Get a dataset by its ID. + operationId: get_dataset_v1beta_datasets__dataset_id__get parameters: - - name: dataset_id - in: path - description: The ID of the dataset to get. - required: true - schema: - type: string - deprecated: false + - name: dataset_id + in: path + required: true + schema: + type: string + description: 'Path parameter: dataset_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Datasets - summary: Unregister a dataset by its ID. + - Datasets + summary: Unregister Dataset description: Unregister a dataset by its ID. + operationId: unregister_dataset_v1beta_datasets__dataset_id__delete parameters: - - name: dataset_id - in: path - description: The ID of the dataset to unregister. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all agents. - description: List all agents. - parameters: - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of agents to return. - required: false - schema: - type: integer - deprecated: false - post: - responses: - '200': - description: >- - An AgentCreateResponse with the agent ID. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Create an agent with the given configuration. - description: >- - Create an agent with the given configuration. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentRequest' + - name: dataset_id + in: path required: true - deprecated: false - /v1alpha/agents/{agent_id}: - get: - responses: - '200': - description: An Agent of the agent. - content: - application/json: - schema: - $ref: '#/components/schemas/Agent' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Describe an agent by its ID. - description: Describe an agent by its ID. - parameters: - - name: agent_id - in: path - description: ID of the agent. - required: true - schema: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent by its ID and its associated sessions and turns. - description: >- - Delete an agent by its ID and its associated sessions and turns. - parameters: - - name: agent_id - in: path - description: The ID of the agent to delete. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session: - post: - responses: - '200': - description: An AgentSessionCreateResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentSessionCreateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new session for an agent. - description: Create a new session for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the session for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentSessionRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}: - get: - responses: - '200': - description: A Session. - content: - application/json: - schema: - $ref: '#/components/schemas/Session' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent session by its ID. - description: Retrieve an agent session by its ID. - parameters: - - name: session_id - in: path - description: The ID of the session to get. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to get the session for. - required: true - schema: - type: string - - name: turn_ids - in: query - description: >- - (Optional) List of turn IDs to filter the session by. - required: false - schema: - type: array - items: - type: string - deprecated: false - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Delete an agent session by its ID and its associated turns. - description: >- - Delete an agent session by its ID and its associated turns. - parameters: - - name: session_id - in: path - description: The ID of the session to delete. - required: true - schema: - type: string - - name: agent_id - in: path - description: >- - The ID of the agent to delete the session for. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn: - post: - responses: - '200': - description: >- - If stream=False, returns a Turn object. If stream=True, returns an SSE - event stream of AgentTurnResponseStreamChunk. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Create a new turn for an agent. - description: Create a new turn for an agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to create the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to create the turn for. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}: - get: - responses: - '200': - description: A Turn. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent turn by its ID. - description: Retrieve an agent turn by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the turn for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the turn for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume: - post: - responses: - '200': - description: >- - A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk - objects. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: >- - Resume an agent turn with executed tool call responses. - description: >- - Resume an agent turn with executed tool call responses. - - When a Turn has the status `awaiting_input` due to pending input from client - side tool calls, this endpoint can be used to submit the outputs from the - tool calls once they are ready. - parameters: - - name: agent_id - in: path - description: The ID of the agent to resume. - required: true - schema: - type: string - - name: session_id - in: path - description: The ID of the session to resume. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to resume. - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ResumeAgentTurnRequest' - required: true - deprecated: false - /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: - get: - responses: - '200': - description: An AgentStepResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/AgentStepResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: Retrieve an agent step by its ID. - description: Retrieve an agent step by its ID. - parameters: - - name: agent_id - in: path - description: The ID of the agent to get the step for. - required: true - schema: - type: string - - name: session_id - in: path - description: >- - The ID of the session to get the step for. - required: true - schema: - type: string - - name: turn_id - in: path - description: The ID of the turn to get the step for. - required: true - schema: - type: string - - name: step_id - in: path - description: The ID of the step to get. - required: true - schema: - type: string - deprecated: false - /v1alpha/agents/{agent_id}/sessions: - get: - responses: - '200': - description: A PaginatedResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/PaginatedResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Agents - summary: List all session(s) of a given agent. - description: List all session(s) of a given agent. - parameters: - - name: agent_id - in: path - description: >- - The ID of the agent to list sessions for. - required: true - schema: - type: string - - name: start_index - in: query - description: The index to start the pagination from. - required: false - schema: - type: integer - - name: limit - in: query - description: The number of sessions to return. - required: false - schema: - type: integer - deprecated: false + schema: + type: string + description: 'Path parameter: dataset_id' + deprecated: true /v1alpha/eval/benchmarks: get: responses: @@ -3783,47 +3381,50 @@ paths: schema: $ref: '#/components/schemas/ListBenchmarksResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: List all benchmarks. + - Benchmarks + summary: List Benchmarks description: List all benchmarks. - parameters: [] - deprecated: false + operationId: list_benchmarks_v1alpha_eval_benchmarks_get post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Benchmarks - summary: Register a benchmark. + - Benchmarks + summary: Register Benchmark description: Register a benchmark. - parameters: [] + operationId: register_benchmark_v1alpha_eval_benchmarks_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RegisterBenchmarkRequest' required: true - deprecated: false + deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}: get: responses: @@ -3834,131 +3435,136 @@ paths: schema: $ref: '#/components/schemas/Benchmark' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Benchmarks - summary: Get a benchmark by its ID. + - Benchmarks + summary: Get Benchmark description: Get a benchmark by its ID. + operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to get. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Benchmarks - summary: Unregister a benchmark. + - Benchmarks + summary: Unregister Benchmark description: Unregister a benchmark. + operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete parameters: - - name: benchmark_id - in: path - description: The ID of the benchmark to unregister. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + deprecated: true /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: post: responses: '200': - description: >- - EvaluateResponse object containing generations and scores. + description: EvaluateResponse object containing generations and scores. content: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Evaluate a list of rows on a benchmark. + - Eval + summary: Evaluate Rows description: Evaluate a list of rows on a benchmark. + operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/EvaluateRowsRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs: post: responses: '200': - description: >- - The job that was created to run the evaluation. + description: The job that was created to run the evaluation. content: application/json: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Run an evaluation on a benchmark. + - Eval + summary: Run Eval description: Run an evaluation on a benchmark. + operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' requestBody: content: application/json: schema: $ref: '#/components/schemas/RunEvalRequest' required: true - deprecated: false /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -3969,67 +3575,69 @@ paths: schema: $ref: '#/components/schemas/Job' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the status of a job. + - Eval + summary: Job Status description: Get the status of a job. + operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' delete: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - Eval - summary: Cancel a job. + - Eval + summary: Job Cancel description: Cancel a job. + operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: get: responses: @@ -4040,68 +3648,67 @@ paths: schema: $ref: '#/components/schemas/EvaluateResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Eval - summary: Get the result of a job. + - Eval + summary: Job Result description: Get the result of a job. + operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string - deprecated: false + - name: benchmark_id + in: path + required: true + schema: + type: string + description: 'Path parameter: benchmark_id' + - name: job_id + in: path + required: true + schema: + type: string + description: 'Path parameter: job_id' /v1alpha/inference/rerank: post: responses: '200': - description: >- - RerankResponse with indices sorted by relevance score (descending). + description: RerankResponse with indices sorted by relevance score (descending). content: application/json: schema: $ref: '#/components/schemas/RerankResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - Inference - summary: >- - Rerank a list of documents based on their relevance to a query. - description: >- - Rerank a list of documents based on their relevance to a query. - parameters: [] + - Inference + summary: Rerank + description: Rerank a list of documents based on their relevance to a query. + operationId: rerank_v1alpha_inference_rerank_post requestBody: content: application/json: schema: $ref: '#/components/schemas/RerankRequest' required: true - deprecated: false /v1alpha/post-training/job/artifacts: get: responses: @@ -4113,54 +3720,56 @@ paths: $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the artifacts of a training job. + - Post Training + summary: Get Training Job Artifacts description: Get the artifacts of a training job. + operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the artifacts of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/job/cancel: post: responses: - '200': - description: OK '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' + '204': + description: Successful Response tags: - - PostTraining (Coming Soon) - summary: Cancel a training job. + - Post Training + summary: Cancel Training Job description: Cancel a training job. - parameters: [] + operationId: cancel_training_job_v1alpha_post_training_job_cancel_post requestBody: content: application/json: schema: $ref: '#/components/schemas/CancelTrainingJobRequest' required: true - deprecated: false /v1alpha/post-training/job/status: get: responses: @@ -4172,27 +3781,28 @@ paths: $ref: '#/components/schemas/PostTrainingJobStatusResponse' '400': $ref: '#/components/responses/BadRequest400' + description: Bad Request '429': - $ref: >- - #/components/responses/TooManyRequests429 + $ref: '#/components/responses/TooManyRequests429' + description: Too Many Requests '500': - $ref: >- - #/components/responses/InternalServerError500 + $ref: '#/components/responses/InternalServerError500' + description: Internal Server Error default: $ref: '#/components/responses/DefaultError' + description: Default Response tags: - - PostTraining (Coming Soon) - summary: Get the status of a training job. + - Post Training + summary: Get Training Job Status description: Get the status of a training job. + operationId: get_training_job_status_v1alpha_post_training_job_status_get parameters: - - name: job_uuid - in: query - description: >- - The UUID of the job to get the status of. - required: true - schema: - type: string - deprecated: false + - name: job_uuid + in: query + required: true + schema: + type: string + title: Job Uuid /v1alpha/post-training/jobs: get: responses: @@ -4203,21 +3813,22 @@ paths: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Get all training jobs. + - Post Training + summary: Get Training Jobs description: Get all training jobs. - parameters: [] - deprecated: false + operationId: get_training_jobs_v1alpha_post_training_jobs_get /v1alpha/post-training/preference-optimize: post: responses: @@ -4228,27 +3839,28 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run preference optimization of a model. + - Post Training + summary: Preference Optimize description: Run preference optimization of a model. - parameters: [] + operationId: preference_optimize_v1alpha_post_training_preference_optimize_post requestBody: content: application/json: schema: $ref: '#/components/schemas/PreferenceOptimizeRequest' required: true - deprecated: false /v1alpha/post-training/supervised-fine-tune: post: responses: @@ -4259,1148 +3871,1277 @@ paths: schema: $ref: '#/components/schemas/PostTrainingJob' '400': + description: Bad Request $ref: '#/components/responses/BadRequest400' '429': - $ref: >- - #/components/responses/TooManyRequests429 + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' '500': - $ref: >- - #/components/responses/InternalServerError500 + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' default: + description: Default Response $ref: '#/components/responses/DefaultError' tags: - - PostTraining (Coming Soon) - summary: Run supervised fine-tuning of a model. + - Post Training + summary: Supervised Fine Tune description: Run supervised fine-tuning of a model. - parameters: [] + operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post requestBody: content: application/json: schema: $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true - deprecated: false -jsonSchemaDialect: >- - https://json-schema.org/draft/2020-12/schema components: schemas: Error: - type: object + description: Error response from the API. Roughly follows RFC 7807. properties: status: + title: Status type: integer - description: HTTP status code title: + title: Title type: string - description: >- - Error title, a short summary of the error which is invariant for an error - type detail: + title: Detail type: string - description: >- - Error detail, a longer human-readable description of the error instance: - type: string - description: >- - (Optional) A URL which can be used to retrieve more information about - the specific occurrence of the error - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - status - - title - - detail + - status + - title + - detail title: Error - description: >- - Error response from the API. Roughly follows RFC 7807. - Order: - type: string - enum: - - asc - - desc - title: Order - description: Sort order for paginated responses. - ListOpenAIChatCompletionResponse: type: object + ListBatchesResponse: properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - description: >- - List of chat completion objects with their input messages - has_more: - type: boolean - description: >- - Whether there are more completions available beyond this list - first_id: - type: string - description: ID of the first completion in this list - last_id: - type: string - description: ID of the last completion in this list object: type: string const: list + title: Object default: list - description: >- - Must be "list" to identify this as a list response - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIChatCompletionResponse - description: >- - Response from listing OpenAI-compatible chat completions. - OpenAIAssistantMessageParam: + data: + items: + $ref: '#/components/schemas/Batch' + type: array + title: Data + description: List of batch objects + first_id: + anyOf: + - type: string + - type: 'null' + description: ID of the first batch in the list + last_id: + anyOf: + - type: string + - type: 'null' + description: ID of the last batch in the list + has_more: + type: boolean + title: Has More + description: Whether there are more batches available + default: false type: object + required: + - data + title: ListBatchesResponse + description: Response containing a list of batch objects. + CreateBatchRequest: + properties: + input_file_id: + type: string + title: Input File Id + endpoint: + type: string + title: Endpoint + completion_window: + type: string + const: 24h + title: Completion Window + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + idempotency_key: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_file_id + - endpoint + - completion_window + title: CreateBatchRequest + Batch: + properties: + id: + type: string + title: Id + completion_window: + type: string + title: Completion Window + created_at: + type: integer + title: Created At + endpoint: + type: string + title: Endpoint + input_file_id: + type: string + title: Input File Id + object: + type: string + const: batch + title: Object + status: + type: string + enum: + - validating + - failed + - in_progress + - finalizing + - completed + - expired + - cancelling + - cancelled + title: Status + cancelled_at: + anyOf: + - type: integer + - type: 'null' + cancelling_at: + anyOf: + - type: integer + - type: 'null' + completed_at: + anyOf: + - type: integer + - type: 'null' + error_file_id: + anyOf: + - type: string + - type: 'null' + errors: + anyOf: + - $ref: '#/components/schemas/Errors' + title: Errors + - type: 'null' + title: Errors + expired_at: + anyOf: + - type: integer + - type: 'null' + expires_at: + anyOf: + - type: integer + - type: 'null' + failed_at: + anyOf: + - type: integer + - type: 'null' + finalizing_at: + anyOf: + - type: integer + - type: 'null' + in_progress_at: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + model: + anyOf: + - type: string + - type: 'null' + output_file_id: + anyOf: + - type: string + - type: 'null' + request_counts: + anyOf: + - $ref: '#/components/schemas/BatchRequestCounts' + title: BatchRequestCounts + - type: 'null' + title: BatchRequestCounts + usage: + anyOf: + - $ref: '#/components/schemas/BatchUsage' + title: BatchUsage + - type: 'null' + title: BatchUsage + additionalProperties: true + type: object + required: + - id + - completion_window + - created_at + - endpoint + - input_file_id + - object + - status + title: Batch + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAICompletionWithInputMessages' + type: array + title: Data + has_more: + type: boolean + title: Has More + first_id: + type: string + title: First Id + last_id: + type: string + title: Last Id + object: + type: string + const: list + title: Object + default: list + type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIChatCompletionResponse + description: Response from listing OpenAI-compatible chat completions. + OpenAIAssistantMessageParam: + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. properties: role: - type: string const: assistant default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the model's response - name: + title: Role type: string - description: >- - (Optional) The name of the assistant message participant. + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + nullable: true + name: + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: >- - List of tool calls. Each tool call is an OpenAIChatCompletionToolCall - object. - additionalProperties: false - required: - - role + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true title: OpenAIAssistantMessageParam - description: >- - A message containing the model's (assistant) response in an OpenAI-compatible - chat completion request. - "OpenAIChatCompletionContentPartImageParam": type: object + OpenAIChatCompletionContentPartImageParam: properties: type: type: string const: image_url + title: Type default: image_url - description: >- - Must be "image_url" to identify this as image content image_url: $ref: '#/components/schemas/OpenAIImageURL' - description: >- - Image URL specification and processing details - additionalProperties: false - required: - - type - - image_url - title: >- - OpenAIChatCompletionContentPartImageParam - description: >- - Image content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionContentPartParam: - oneOf: - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - - $ref: '#/components/schemas/OpenAIFile' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - file: '#/components/schemas/OpenAIFile' - OpenAIChatCompletionContentPartTextParam: type: object + required: + - image_url + title: OpenAIChatCompletionContentPartImageParam + description: Image content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionContentPartParam: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + OpenAIChatCompletionContentPartTextParam: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to identify this as text content text: type: string - description: The text content of the message - additionalProperties: false - required: - - type - - text - title: OpenAIChatCompletionContentPartTextParam - description: >- - Text content part for OpenAI-compatible chat completion messages. - OpenAIChatCompletionToolCall: + title: Text type: object + required: + - text + title: OpenAIChatCompletionContentPartTextParam + description: Text content part for OpenAI-compatible chat completion messages. + OpenAIChatCompletionToolCall: properties: index: - type: integer - description: >- - (Optional) Index of the tool call in the list + anyOf: + - type: integer + - type: 'null' id: - type: string - description: >- - (Optional) Unique identifier for the tool call + anyOf: + - type: string + - type: 'null' type: type: string const: function + title: Type default: function - description: >- - Must be "function" to identify this as a function call function: - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' - description: (Optional) Function call details - additionalProperties: false - required: - - type - title: OpenAIChatCompletionToolCall - description: >- - Tool call specification for OpenAI-compatible chat completion responses. - OpenAIChatCompletionToolCallFunction: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' + title: OpenAIChatCompletionToolCallFunction + - type: 'null' + title: OpenAIChatCompletionToolCallFunction type: object + title: OpenAIChatCompletionToolCall + description: Tool call specification for OpenAI-compatible chat completion responses. + OpenAIChatCompletionToolCallFunction: properties: name: - type: string - description: (Optional) Name of the function to call + anyOf: + - type: string + - type: 'null' arguments: - type: string - description: >- - (Optional) Arguments to pass to the function as a JSON string - additionalProperties: false - title: OpenAIChatCompletionToolCallFunction - description: >- - Function call details for OpenAI-compatible tool calls. - OpenAIChatCompletionUsage: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIChatCompletionToolCallFunction + description: Function call details for OpenAI-compatible tool calls. + OpenAIChatCompletionUsage: properties: prompt_tokens: type: integer - description: Number of tokens in the prompt + title: Prompt Tokens completion_tokens: type: integer - description: Number of tokens in the completion + title: Completion Tokens total_tokens: type: integer - description: Total tokens used (prompt + completion) + title: Total Tokens prompt_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - title: >- - OpenAIChatCompletionUsagePromptTokensDetails - description: >- - Token details for prompt tokens in OpenAI chat completion usage. + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsagePromptTokensDetails' + title: OpenAIChatCompletionUsagePromptTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsagePromptTokensDetails completion_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - title: >- - OpenAIChatCompletionUsageCompletionTokensDetails - description: >- - Token details for output tokens in OpenAI chat completion usage. - additionalProperties: false - required: - - prompt_tokens - - completion_tokens - - total_tokens - title: OpenAIChatCompletionUsage - description: >- - Usage information for OpenAI chat completion. - OpenAIChoice: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsageCompletionTokensDetails' + title: OpenAIChatCompletionUsageCompletionTokensDetails + - type: 'null' + title: OpenAIChatCompletionUsageCompletionTokensDetails type: object + required: + - prompt_tokens + - completion_tokens + - total_tokens + title: OpenAIChatCompletionUsage + description: Usage information for OpenAI chat completion. + OpenAIChoice: properties: message: oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam-Output | ... (5 variants) discriminator: propertyName: role mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' developer: '#/components/schemas/OpenAIDeveloperMessageParam' - description: The message from the model + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' finish_reason: type: string - description: The reason the model stopped generating + title: Finish Reason index: type: integer - description: The index of the choice + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - required: - - message - - finish_reason - - index - title: OpenAIChoice - description: >- - A choice from an OpenAI-compatible chat completion response. - OpenAIChoiceLogprobs: + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs type: object + required: + - message + - finish_reason + - index + title: OpenAIChoice + description: A choice from an OpenAI-compatible chat completion response. + OpenAIChoiceLogprobs: properties: content: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' refusal: - type: array - items: - $ref: '#/components/schemas/OpenAITokenLogProb' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false - title: OpenAIChoiceLogprobs - description: >- - The log probabilities for the tokens in the message from an OpenAI-compatible - chat completion response. - OpenAIDeveloperMessageParam: + anyOf: + - items: + $ref: '#/components/schemas/OpenAITokenLogProb' + type: array + - type: 'null' type: object + title: OpenAIChoiceLogprobs + description: The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + OpenAIDeveloperMessageParam: properties: role: type: string const: developer + title: Role default: developer - description: >- - Must be "developer" to identify this as a developer message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The content of the developer message + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the developer message participant. - additionalProperties: false - required: - - role - - content - title: OpenAIDeveloperMessageParam - description: >- - A message from the developer in an OpenAI-compatible chat completion request. - OpenAIFile: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAIDeveloperMessageParam + description: A message from the developer in an OpenAI-compatible chat completion request. + OpenAIFile: properties: type: type: string const: file + title: Type default: file file: $ref: '#/components/schemas/OpenAIFileFile' - additionalProperties: false + type: object required: - - type - - file + - file title: OpenAIFile OpenAIFileFile: - type: object properties: file_data: - type: string + anyOf: + - type: string + - type: 'null' file_id: - type: string + anyOf: + - type: string + - type: 'null' filename: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object title: OpenAIFileFile OpenAIImageURL: - type: object properties: url: type: string - description: >- - URL of the image to include in the message + title: Url detail: - type: string - description: >- - (Optional) Level of detail for image processing. Can be "low", "high", - or "auto" - additionalProperties: false - required: - - url - title: OpenAIImageURL - description: >- - Image URL specification for OpenAI-compatible chat completion messages. - OpenAIMessageParam: - oneOf: - - $ref: '#/components/schemas/OpenAIUserMessageParam' - - $ref: '#/components/schemas/OpenAISystemMessageParam' - - $ref: '#/components/schemas/OpenAIAssistantMessageParam' - - $ref: '#/components/schemas/OpenAIToolMessageParam' - - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/OpenAIUserMessageParam' - system: '#/components/schemas/OpenAISystemMessageParam' - assistant: '#/components/schemas/OpenAIAssistantMessageParam' - tool: '#/components/schemas/OpenAIToolMessageParam' - developer: '#/components/schemas/OpenAIDeveloperMessageParam' - OpenAISystemMessageParam: + anyOf: + - type: string + - type: 'null' type: object + required: + - url + title: OpenAIImageURL + description: Image URL specification for OpenAI-compatible chat completion messages. + OpenAIMessageParam: + discriminator: + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam' + propertyName: role + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam' + title: OpenAIUserMessageParam + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam' + title: OpenAIAssistantMessageParam + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + title: OpenAIUserMessageParam | ... (5 variants) + OpenAISystemMessageParam: properties: role: type: string const: system + title: Role default: system - description: >- - Must be "system" to identify this as a system message content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] name: - type: string - description: >- - (Optional) The name of the system message participant. - additionalProperties: false - required: - - role - - content - title: OpenAISystemMessageParam - description: >- - A system message providing instructions or context to the model. - OpenAITokenLogProb: + anyOf: + - type: string + - type: 'null' type: object + required: + - content + title: OpenAISystemMessageParam + description: A system message providing instructions or context to the model. + OpenAITokenLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number + title: Logprob top_logprobs: - type: array items: $ref: '#/components/schemas/OpenAITopLogProb' - additionalProperties: false - required: - - token - - logprob - - top_logprobs - title: OpenAITokenLogProb - description: >- - The log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIToolMessageParam: + type: array + title: Top Logprobs type: object + required: + - token + - logprob + - top_logprobs + title: OpenAITokenLogProb + description: |- + The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + OpenAIToolMessageParam: properties: role: type: string const: tool + title: Role default: tool - description: >- - Must be "tool" to identify this as a tool response tool_call_id: type: string - description: >- - Unique identifier for the tool call this response is for + title: Tool Call Id content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - description: The response content from the tool - additionalProperties: false - required: - - role - - tool_call_id - - content - title: OpenAIToolMessageParam - description: >- - A message representing the result of a tool invocation in an OpenAI-compatible - chat completion request. - OpenAITopLogProb: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + title: string | list[OpenAIChatCompletionContentPartTextParam] type: object + required: + - tool_call_id + - content + title: OpenAIToolMessageParam + description: A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + OpenAITopLogProb: properties: token: type: string + title: Token bytes: - type: array - items: - type: integer + anyOf: + - items: + type: integer + type: array + - type: 'null' logprob: type: number - additionalProperties: false - required: - - token - - logprob - title: OpenAITopLogProb - description: >- - The top log probability for a token from an OpenAI-compatible chat completion - response. - OpenAIUserMessageParam: + title: Logprob type: object + required: + - token + - logprob + title: OpenAITopLogProb + description: |- + The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + OpenAIUserMessageParam: + description: A message from the user in an OpenAI-compatible chat completion request. properties: role: - type: string const: user default: user - description: >- - Must be "user" to identify this as a user message - content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionContentPartParam' - description: >- - The content of the message, which can include text and other media - name: + title: Role type: string - description: >- - (Optional) The name of the user message participant. - additionalProperties: false + content: + anyOf: + - type: string + - items: + discriminator: + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + nullable: true required: - - role - - content + - content title: OpenAIUserMessageParam - description: >- - A message from the user in an OpenAI-compatible chat completion request. - OpenAIJSONSchema: type: object + OpenAIJSONSchema: properties: name: type: string - description: Name of the schema + title: Name description: - type: string - description: (Optional) Description of the schema + anyOf: + - type: string + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict adherence to the schema + anyOf: + - type: boolean + - type: 'null' schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The JSON schema definition - additionalProperties: false - required: - - name - title: OpenAIJSONSchema - description: >- - JSON schema specification for OpenAI-compatible structured response format. - OpenAIResponseFormatJSONObject: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: OpenAIJSONSchema + description: JSON schema specification for OpenAI-compatible structured response format. + OpenAIResponseFormatJSONObject: properties: type: type: string const: json_object + title: Type default: json_object - description: >- - Must be "json_object" to indicate generic JSON object response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatJSONObject - description: >- - JSON object response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatJSONSchema: type: object + title: OpenAIResponseFormatJSONObject + description: JSON object response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatJSONSchema: properties: type: type: string const: json_schema + title: Type default: json_schema - description: >- - Must be "json_schema" to indicate structured JSON response format json_schema: $ref: '#/components/schemas/OpenAIJSONSchema' - description: >- - The JSON schema specification for the response - additionalProperties: false - required: - - type - - json_schema - title: OpenAIResponseFormatJSONSchema - description: >- - JSON schema response format for OpenAI-compatible chat completion requests. - OpenAIResponseFormatParam: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseFormatText' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' - - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/OpenAIResponseFormatText' - json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' - json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' - OpenAIResponseFormatText: type: object + required: + - json_schema + title: OpenAIResponseFormatJSONSchema + description: JSON schema response format for OpenAI-compatible chat completion requests. + OpenAIResponseFormatParam: + discriminator: + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + OpenAIResponseFormatText: properties: type: type: string const: text + title: Type default: text - description: >- - Must be "text" to indicate plain text response format - additionalProperties: false - required: - - type - title: OpenAIResponseFormatText - description: >- - Text response format for OpenAI-compatible chat completion requests. - OpenAIChatCompletionRequestWithExtraBody: type: object + title: OpenAIResponseFormatText + description: Text response format for OpenAI-compatible chat completion requests. + OpenAIChatCompletionRequestWithExtraBody: properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: List of messages in the conversation. - frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - function_call: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The function call to use. - functions: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) List of functions to use. - logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. - logprobs: - type: boolean - description: (Optional) The log probabilities to use. - max_completion_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. - n: - type: integer - description: >- - (Optional) The number of completions to generate. - parallel_tool_calls: - type: boolean - description: >- - (Optional) Whether to parallelize tool calls. - presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. - response_format: - $ref: '#/components/schemas/OpenAIResponseFormatParam' - description: (Optional) The response format to use. - seed: - type: integer - description: (Optional) The seed to use. - stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. - stream: - type: boolean - description: >- - (Optional) Whether to stream the response. - stream_options: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. - temperature: - type: number - description: (Optional) The temperature to use. - tool_choice: - oneOf: - - type: string - - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tool choice to use. - tools: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) type: array - items: + minItems: 1 + title: Messages + frequency_penalty: + anyOf: + - type: number + - type: 'null' + function_call: + anyOf: + - type: string + - additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The tools to use. + - type: 'null' + title: string | object + functions: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + logit_bias: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + logprobs: + anyOf: + - type: boolean + - type: 'null' + max_completion_tokens: + anyOf: + - type: integer + - type: 'null' + max_tokens: + anyOf: + - type: integer + - type: 'null' + n: + anyOf: + - type: integer + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + presence_penalty: + anyOf: + - type: number + - type: 'null' + response_format: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText + - $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema' + title: OpenAIResponseFormatJSONSchema + - $ref: '#/components/schemas/OpenAIResponseFormatJSONObject' + title: OpenAIResponseFormatJSONObject + discriminator: + propertyName: type + mapping: + json_object: '#/components/schemas/OpenAIResponseFormatJSONObject' + json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema' + text: '#/components/schemas/OpenAIResponseFormatText' + title: OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject + - type: 'null' + title: Response Format + seed: + anyOf: + - type: integer + - type: 'null' + stop: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] + stream: + anyOf: + - type: boolean + - type: 'null' + stream_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + temperature: + anyOf: + - type: number + - type: 'null' + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + - type: 'null' + title: string | object + tools: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' top_logprobs: - type: integer - description: >- - (Optional) The top log probabilities to use. + anyOf: + - type: integer + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. - additionalProperties: false - required: - - model - - messages - title: OpenAIChatCompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible chat completion endpoint. - OpenAIChatCompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - messages + title: OpenAIChatCompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible chat completion endpoint. + OpenAIChatCompletion: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion - additionalProperties: false - required: - - id - - choices - - object - - created - - model - title: OpenAIChatCompletion - description: >- - Response from an OpenAI-compatible chat completion request. - OpenAIChatCompletionChunk: + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage type: object + required: + - id + - choices + - created + - model + title: OpenAIChatCompletion + description: Response from an OpenAI-compatible chat completion request. + OpenAIChatCompletionChunk: + description: Chunk from a streaming response to an OpenAI-compatible chat completion request. properties: id: + title: Id type: string - description: The ID of the chat completion choices: - type: array items: $ref: '#/components/schemas/OpenAIChunkChoice' - description: List of choices + title: Choices + type: array object: - type: string const: chat.completion.chunk default: chat.completion.chunk - description: >- - The object type, which will be "chat.completion.chunk" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: + title: Object + type: string + created: + title: Created + type: integer + model: + title: Model type: string - description: >- - The model that was used to generate the chat completion usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information (typically included in final chunk with stream_options) - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + nullable: true + title: OpenAIChatCompletionUsage required: - - id - - choices - - object - - created - - model + - id + - choices + - created + - model title: OpenAIChatCompletionChunk - description: >- - Chunk from a streaming response to an OpenAI-compatible chat completion request. - OpenAIChoiceDelta: type: object + OpenAIChoiceDelta: + description: A delta from an OpenAI-compatible chat completion streaming response. properties: content: - type: string - description: (Optional) The content of the delta + anyOf: + - type: string + - type: 'null' + nullable: true refusal: - type: string - description: (Optional) The refusal of the delta + anyOf: + - type: string + - type: 'null' + nullable: true role: - type: string - description: (Optional) The role of the delta + anyOf: + - type: string + - type: 'null' + nullable: true tool_calls: - type: array - items: - $ref: '#/components/schemas/OpenAIChatCompletionToolCall' - description: (Optional) The tool calls of the delta + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + nullable: true reasoning_content: - type: string - description: >- - (Optional) The reasoning content from the model (non-standard, for o1/o3 - models) - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true title: OpenAIChoiceDelta - description: >- - A delta from an OpenAI-compatible chat completion streaming response. - OpenAIChunkChoice: type: object + OpenAIChunkChoice: + description: A chunk choice from an OpenAI-compatible chat completion streaming response. properties: delta: $ref: '#/components/schemas/OpenAIChoiceDelta' - description: The delta from the chunk finish_reason: + title: Finish Reason type: string - description: The reason the model stopped generating index: + title: Index type: integer - description: The index of the choice logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - description: >- - (Optional) The log probabilities for the tokens in the message - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + nullable: true + title: OpenAIChoiceLogprobs required: - - delta - - finish_reason - - index + - delta + - finish_reason + - index title: OpenAIChunkChoice - description: >- - A chunk choice from an OpenAI-compatible chat completion streaming response. - OpenAICompletionWithInputMessages: type: object + OpenAICompletionWithInputMessages: properties: id: type: string - description: The ID of the chat completion + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAIChoice' - description: List of choices + type: array + title: Choices object: type: string const: chat.completion + title: Object default: chat.completion - description: >- - The object type, which will be "chat.completion" created: type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created + title: Created model: type: string - description: >- - The model that was used to generate the chat completion + title: Model usage: - $ref: '#/components/schemas/OpenAIChatCompletionUsage' - description: >- - Token usage information for the completion + anyOf: + - $ref: '#/components/schemas/OpenAIChatCompletionUsage' + title: OpenAIChatCompletionUsage + - type: 'null' + title: OpenAIChatCompletionUsage input_messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Output' + title: OpenAIAssistantMessageParam-Output + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Output' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Output' + title: OpenAIUserMessageParam-Output | ... (5 variants) + type: array + title: Input Messages + type: object required: - - id - - choices - - object - - created - - model - - input_messages + - id + - choices + - created + - model + - input_messages title: OpenAICompletionWithInputMessages OpenAICompletionRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. + title: Model prompt: - oneOf: - - type: string - - type: array - items: - type: string - - type: array + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - items: + type: integer + type: array + title: list[integer] + - items: items: type: integer - - type: array - items: - type: array - items: - type: integer - description: The prompt to generate a completion for. + type: array + type: array + title: list[array] + title: string | ... (4 variants) best_of: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' echo: - type: boolean - description: (Optional) Whether to echo the prompt. + anyOf: + - type: boolean + - type: 'null' frequency_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' logit_bias: - type: object - additionalProperties: - type: number - description: (Optional) The logit bias to use. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' logprobs: - type: boolean - description: (Optional) The log probabilities to use. + anyOf: + - type: boolean + - type: 'null' max_tokens: - type: integer - description: >- - (Optional) The maximum number of tokens to generate. + anyOf: + - type: integer + - type: 'null' n: - type: integer - description: >- - (Optional) The number of completions to generate. + anyOf: + - type: integer + - type: 'null' presence_penalty: - type: number - description: >- - (Optional) The penalty for repeated tokens. + anyOf: + - type: number + - type: 'null' seed: - type: integer - description: (Optional) The seed to use. + anyOf: + - type: integer + - type: 'null' stop: - oneOf: - - type: string - - type: array - items: - type: string - description: (Optional) The stop tokens to use. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + - type: 'null' + title: string | list[string] stream: - type: boolean - description: >- - (Optional) Whether to stream the response. + anyOf: + - type: boolean + - type: 'null' stream_options: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) The stream options to use. + anyOf: + - additionalProperties: true + type: object + - type: 'null' temperature: - type: number - description: (Optional) The temperature to use. + anyOf: + - type: number + - type: 'null' top_p: - type: number - description: (Optional) The top p to use. + anyOf: + - type: number + - type: 'null' user: - type: string - description: (Optional) The user to use. + anyOf: + - type: string + - type: 'null' suffix: - type: string - description: >- - (Optional) The suffix that should be appended to the completion. - additionalProperties: false - required: - - model - - prompt - title: OpenAICompletionRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible completion endpoint. - OpenAICompletion: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - prompt + title: OpenAICompletionRequestWithExtraBody + description: Request parameters for OpenAI-compatible completion endpoint. + OpenAICompletion: properties: id: type: string + title: Id choices: - type: array items: $ref: '#/components/schemas/OpenAICompletionChoice' + type: array + title: Choices created: type: integer + title: Created model: type: string + title: Model object: type: string const: text_completion + title: Object default: text_completion - additionalProperties: false - required: - - id - - choices - - created - - model - - object - title: OpenAICompletion - description: >- - Response from an OpenAI-compatible completion request. - OpenAICompletionChoice: type: object + required: + - id + - choices + - created + - model + title: OpenAICompletion + description: |- + Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + OpenAICompletionChoice: properties: finish_reason: type: string + title: Finish Reason text: type: string + title: Text index: type: integer + title: Index logprobs: - $ref: '#/components/schemas/OpenAIChoiceLogprobs' - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIChoiceLogprobs' + title: OpenAIChoiceLogprobs + - type: 'null' + title: OpenAIChoiceLogprobs + type: object required: - - finish_reason - - text - - index + - finish_reason + - text + - index title: OpenAICompletionChoice - description: >- + description: |- A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice ConversationItem: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' @@ -5408,8106 +5149,7885 @@ components: mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) OpenAIResponseAnnotationCitation: - type: object properties: type: type: string const: url_citation + title: Type default: url_citation - description: >- - Annotation type identifier, always "url_citation" end_index: type: integer - description: >- - End position of the citation span in the content + title: End Index start_index: type: integer - description: >- - Start position of the citation span in the content + title: Start Index title: type: string - description: Title of the referenced web resource + title: Title url: type: string - description: URL of the referenced web resource - additionalProperties: false - required: - - type - - end_index - - start_index - - title - - url - title: OpenAIResponseAnnotationCitation - description: >- - URL citation annotation for referencing external web resources. - "OpenAIResponseAnnotationContainerFileCitation": + title: Url type: object + required: + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + description: URL citation annotation for referencing external web resources. + OpenAIResponseAnnotationContainerFileCitation: properties: type: type: string const: container_file_citation + title: Type default: container_file_citation container_id: type: string + title: Container Id end_index: type: integer + title: End Index file_id: type: string + title: File Id filename: type: string + title: Filename start_index: type: integer - additionalProperties: false - required: - - type - - container_id - - end_index - - file_id - - filename - - start_index - title: >- - OpenAIResponseAnnotationContainerFileCitation - OpenAIResponseAnnotationFileCitation: + title: Start Index type: object + required: + - container_id + - end_index + - file_id + - filename + - start_index + title: OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: properties: type: type: string const: file_citation + title: Type default: file_citation - description: >- - Annotation type identifier, always "file_citation" file_id: type: string - description: Unique identifier of the referenced file + title: File Id filename: type: string - description: Name of the referenced file + title: Filename index: type: integer - description: >- - Position index of the citation within the content - additionalProperties: false - required: - - type - - file_id - - filename - - index - title: OpenAIResponseAnnotationFileCitation - description: >- - File citation annotation for referencing specific files in response content. - OpenAIResponseAnnotationFilePath: + title: Index type: object + required: + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + description: File citation annotation for referencing specific files in response content. + OpenAIResponseAnnotationFilePath: properties: type: type: string const: file_path + title: Type default: file_path file_id: type: string + title: File Id index: type: integer - additionalProperties: false + title: Index + type: object required: - - type - - file_id - - index + - file_id + - index title: OpenAIResponseAnnotationFilePath OpenAIResponseAnnotations: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) OpenAIResponseContentPartRefusal: - type: object properties: type: type: string const: refusal + title: Type default: refusal - description: >- - Content part type identifier, always "refusal" refusal: type: string - description: Refusal text supplied by the model - additionalProperties: false - required: - - type - - refusal - title: OpenAIResponseContentPartRefusal - description: >- - Refusal content within a streamed response part. - "OpenAIResponseInputFunctionToolCallOutput": + title: Refusal type: object + required: + - refusal + title: OpenAIResponseContentPartRefusal + description: Refusal content within a streamed response part. + OpenAIResponseInputFunctionToolCallOutput: properties: call_id: type: string + title: Call Id output: type: string + title: Output type: type: string const: function_call_output + title: Type default: function_call_output id: - type: string + anyOf: + - type: string + - type: 'null' status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. - OpenAIResponseInputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - discriminator: - propertyName: type - mapping: - input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' - input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' - OpenAIResponseInputMessageContentImage: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - output + title: OpenAIResponseInputFunctionToolCallOutput + description: This represents the output of a function call that gets passed back to the model. + OpenAIResponseInputMessageContent: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + OpenAIResponseInputMessageContentFile: + properties: + type: + type: string + const: input_file + title: Type + default: input_file + file_data: + anyOf: + - type: string + - type: 'null' + file_id: + anyOf: + - type: string + - type: 'null' + file_url: + anyOf: + - type: string + - type: 'null' + filename: + anyOf: + - type: string + - type: 'null' + type: object + title: OpenAIResponseInputMessageContentFile + description: File content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentImage: properties: detail: - oneOf: - - type: string - const: low - - type: string - const: high - - type: string - const: auto + title: Detail default: auto - description: >- - Level of detail for image processing, can be "low", "high", or "auto" + type: string + enum: + - low + - high + - auto type: type: string const: input_image + title: Type default: input_image - description: >- - Content type identifier, always "input_image" + file_id: + anyOf: + - type: string + - type: 'null' image_url: - type: string - description: (Optional) URL of the image content - additionalProperties: false - required: - - detail - - type - title: OpenAIResponseInputMessageContentImage - description: >- - Image content for input messages in OpenAI response format. - OpenAIResponseInputMessageContentText: + anyOf: + - type: string + - type: 'null' type: object + title: OpenAIResponseInputMessageContentImage + description: Image content for input messages in OpenAI response format. + OpenAIResponseInputMessageContentText: properties: text: type: string - description: The text content of the input message + title: Text type: type: string const: input_text + title: Type default: input_text - description: >- - Content type identifier, always "input_text" - additionalProperties: false - required: - - text - - type - title: OpenAIResponseInputMessageContentText - description: >- - Text content for input messages in OpenAI response format. - OpenAIResponseMCPApprovalRequest: type: object + required: + - text + title: OpenAIResponseInputMessageContentText + description: Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: properties: arguments: type: string + title: Arguments id: type: string + title: Id name: type: string + title: Name server_label: type: string + title: Server Label type: type: string const: mcp_approval_request + title: Type default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: type: object + required: + - arguments + - id + - name + - server_label + title: OpenAIResponseMCPApprovalRequest + description: A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: properties: approval_request_id: type: string + title: Approval Request Id approve: type: boolean + title: Approve type: type: string const: mcp_approval_response + title: Type default: mcp_approval_response id: - type: string + anyOf: + - type: string + - type: 'null' reason: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - approval_request_id - - approve - - type + - approval_request_id + - approve title: OpenAIResponseMCPApprovalResponse description: A response to an MCP approval request. OpenAIResponseMessage: - type: object + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. properties: content: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputMessageContent' - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + anyOf: + - type: string + - items: + discriminator: + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] role: - oneOf: - - type: string - const: system - - type: string - const: developer - - type: string - const: user - - type: string - const: assistant - type: + title: Role type: string + enum: + - system + - developer + - user + - assistant + default: system + type: const: message default: message + title: Type + type: string id: - type: string + anyOf: + - type: string + - type: 'null' + nullable: true status: - type: string - additionalProperties: false + anyOf: + - type: string + - type: 'null' + nullable: true required: - - content - - role - - type + - content + - role title: OpenAIResponseMessage - description: >- - Corresponds to the various Message types in the Responses API. They are all - under one type because the Responses API gives them all the same "type" value, - and there is no way to tell them apart in certain scenarios. + type: object OpenAIResponseOutputMessageContent: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' - "OpenAIResponseOutputMessageContentOutputText": - type: object + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + OpenAIResponseOutputMessageContentOutputText: properties: text: type: string + title: Text type: type: string const: output_text + title: Type default: output_text annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - additionalProperties: false - required: - - text - - type - - annotations - title: >- - OpenAIResponseOutputMessageContentOutputText - "OpenAIResponseOutputMessageFileSearchToolCall": + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + discriminator: + propertyName: type + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + type: array + title: Annotations type: object + required: + - text + title: OpenAIResponseOutputMessageContentOutputText + OpenAIResponseOutputMessageFileSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id queries: - type: array items: type: string - description: List of search queries executed + type: array + title: Queries status: type: string - description: >- - Current status of the file search operation + title: Status type: type: string const: file_search_call + title: Type default: file_search_call - description: >- - Tool call type identifier, always "file_search_call" results: - type: array - items: - type: object - properties: - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes associated with the file - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: >- - Relevance score for this search result (between 0 and 1) - text: - type: string - description: Text content of the search result - additionalProperties: false - required: - - attributes - - file_id - - filename - - score - - text - title: >- - OpenAIResponseOutputMessageFileSearchToolCallResults - description: >- - Search results returned by the file search operation. - description: >- - (Optional) Search results returned by the file search operation - additionalProperties: false - required: - - id - - queries - - status - - type - title: >- - OpenAIResponseOutputMessageFileSearchToolCall - description: >- - File search tool call output message for OpenAI responses. - "OpenAIResponseOutputMessageFunctionToolCall": + anyOf: + - items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults' + type: array + - type: 'null' type: object + required: + - id + - queries + - status + title: OpenAIResponseOutputMessageFileSearchToolCall + description: File search tool call output message for OpenAI responses. + OpenAIResponseOutputMessageFunctionToolCall: properties: call_id: type: string - description: Unique identifier for the function call + title: Call Id name: type: string - description: Name of the function being called + title: Name arguments: type: string - description: >- - JSON string containing the function arguments + title: Arguments type: type: string const: function_call + title: Type default: function_call - description: >- - Tool call type identifier, always "function_call" id: - type: string - description: >- - (Optional) Additional identifier for the tool call + anyOf: + - type: string + - type: 'null' status: - type: string - description: >- - (Optional) Current status of the function call execution - additionalProperties: false - required: - - call_id - - name - - arguments - - type - title: >- - OpenAIResponseOutputMessageFunctionToolCall - description: >- - Function tool call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPCall: + anyOf: + - type: string + - type: 'null' type: object + required: + - call_id + - name + - arguments + title: OpenAIResponseOutputMessageFunctionToolCall + description: Function tool call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPCall: properties: id: type: string - description: Unique identifier for this MCP call + title: Id type: type: string const: mcp_call + title: Type default: mcp_call - description: >- - Tool call type identifier, always "mcp_call" arguments: type: string - description: >- - JSON string containing the MCP call arguments + title: Arguments name: type: string - description: Name of the MCP method being called + title: Name server_label: type: string - description: >- - Label identifying the MCP server handling the call + title: Server Label error: - type: string - description: >- - (Optional) Error message if the MCP call failed + anyOf: + - type: string + - type: 'null' output: - type: string - description: >- - (Optional) Output result from the successful MCP call - additionalProperties: false - required: - - id - - type - - arguments - - name - - server_label - title: OpenAIResponseOutputMessageMCPCall - description: >- - Model Context Protocol (MCP) call output message for OpenAI responses. - OpenAIResponseOutputMessageMCPListTools: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + - arguments + - name + - server_label + title: OpenAIResponseOutputMessageMCPCall + description: Model Context Protocol (MCP) call output message for OpenAI responses. + OpenAIResponseOutputMessageMCPListTools: properties: id: type: string - description: >- - Unique identifier for this MCP list tools operation + title: Id type: type: string const: mcp_list_tools + title: Type default: mcp_list_tools - description: >- - Tool call type identifier, always "mcp_list_tools" server_label: type: string - description: >- - Label identifying the MCP server providing the tools + title: Server Label tools: - type: array items: - type: object - properties: - input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - JSON schema defining the tool's input parameters - name: - type: string - description: Name of the tool - description: - type: string - description: >- - (Optional) Description of what the tool does - additionalProperties: false - required: - - input_schema - - name - title: MCPListToolsTool - description: >- - Tool definition returned by MCP list tools operation. - description: >- - List of available tools provided by the MCP server - additionalProperties: false - required: - - id - - type - - server_label - - tools - title: OpenAIResponseOutputMessageMCPListTools - description: >- - MCP list tools output message containing available tools from an MCP server. - "OpenAIResponseOutputMessageWebSearchToolCall": + $ref: '#/components/schemas/MCPListToolsTool' + type: array + title: Tools type: object + required: + - id + - server_label + - tools + title: OpenAIResponseOutputMessageMCPListTools + description: MCP list tools output message containing available tools from an MCP server. + OpenAIResponseOutputMessageWebSearchToolCall: properties: id: type: string - description: Unique identifier for this tool call + title: Id status: type: string - description: >- - Current status of the web search operation + title: Status type: type: string const: web_search_call + title: Type default: web_search_call - description: >- - Tool call type identifier, always "web_search_call" - additionalProperties: false - required: - - id - - status - - type - title: >- - OpenAIResponseOutputMessageWebSearchToolCall - description: >- - Web search tool call output message for OpenAI responses. - CreateConversationRequest: type: object + required: + - id + - status + title: OpenAIResponseOutputMessageWebSearchToolCall + description: Web search tool call output message for OpenAI responses. + CreateConversationRequest: properties: items: - type: array - items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Initial items to include in the conversation context. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + - type: 'null' metadata: - type: object - additionalProperties: - type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object title: CreateConversationRequest Conversation: - type: object properties: id: type: string + title: Id + description: The unique ID of the conversation. object: type: string const: conversation + title: Object + description: The object type, which is always conversation. default: conversation created_at: type: integer + title: Created At + description: The time at which the conversation was created, measured in seconds since the Unix epoch. metadata: - type: object - additionalProperties: - type: string - items: - type: array - items: + anyOf: + - additionalProperties: + type: string type: object - title: dict - description: >- - dict() -> new empty dictionary dict(mapping) -> new dictionary initialized - from a mapping object's (key, value) pairs dict(iterable) -> new - dictionary initialized as if via: d = {} for k, v in iterable: d[k] - = v dict(**kwargs) -> new dictionary initialized with the name=value - pairs in the keyword argument list. For example: dict(one=1, two=2) - additionalProperties: false + - type: 'null' + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. + items: + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + description: Initial items to include in the conversation context. You may add up to 20 items at a time. + type: object required: - - id - - object - - created_at + - id + - created_at title: Conversation description: OpenAI-compatible conversation object. UpdateConversationRequest: - type: object properties: metadata: - type: object additionalProperties: type: string - description: >- - Set of key-value pairs that can be attached to an object. - additionalProperties: false + type: object + title: Metadata + type: object required: - - metadata + - metadata title: UpdateConversationRequest ConversationDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted conversation identifier object: type: string + title: Object + description: Object type default: conversation.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationDeletedResource description: Response for deleted conversation. ConversationItemList: - type: object properties: object: type: string + title: Object + description: Object type default: list data: - type: array items: - $ref: '#/components/schemas/ConversationItem' + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (9 variants) + type: array + title: Data + description: List of conversation items first_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the first item in the list last_id: - type: string + anyOf: + - type: string + - type: 'null' + description: The ID of the last item in the list has_more: type: boolean + title: Has More + description: Whether there are more items available default: false - additionalProperties: false - required: - - object - - data - - has_more - title: ConversationItemList - description: >- - List of conversation items with pagination. - AddItemsRequest: type: object + required: + - data + title: ConversationItemList + description: List of conversation items with pagination. + AddItemsRequest: properties: items: - type: array items: - $ref: '#/components/schemas/ConversationItem' - description: >- - Items to include in the conversation context. - additionalProperties: false + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (9 variants) + type: array + title: Items + type: object required: - - items + - items title: AddItemsRequest ConversationItemDeletedResource: - type: object properties: id: type: string + title: Id + description: The deleted item identifier object: type: string + title: Object + description: Object type default: conversation.item.deleted deleted: type: boolean + title: Deleted + description: Whether the object was deleted default: true - additionalProperties: false + type: object required: - - id - - object - - deleted + - id title: ConversationItemDeletedResource description: Response for deleted conversation item. OpenAIEmbeddingsRequestWithExtraBody: - type: object properties: model: type: string - description: >- - The identifier of the model to use. The model must be an embedding model - registered with Llama Stack and available via the /models endpoint. + title: Model input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input text to embed, encoded as a string or array of strings. To embed - multiple inputs in a single request, pass an array of strings. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] encoding_format: - type: string + anyOf: + - type: string + - type: 'null' default: float - description: >- - (Optional) The format to return the embeddings in. Can be either "float" - or "base64". Defaults to "float". dimensions: - type: integer - description: >- - (Optional) The number of dimensions the resulting output embeddings should - have. Only supported in text-embedding-3 and later models. + anyOf: + - type: integer + - type: 'null' user: - type: string - description: >- - (Optional) A unique identifier representing your end-user, which can help - OpenAI to monitor and detect abuse. - additionalProperties: false - required: - - model - - input - title: OpenAIEmbeddingsRequestWithExtraBody - description: >- - Request parameters for OpenAI-compatible embeddings endpoint. - OpenAIEmbeddingData: + anyOf: + - type: string + - type: 'null' + additionalProperties: true type: object + required: + - model + - input + title: OpenAIEmbeddingsRequestWithExtraBody + description: Request parameters for OpenAI-compatible embeddings endpoint. + OpenAIEmbeddingData: properties: object: type: string const: embedding + title: Object default: embedding - description: >- - The object type, which will be "embedding" embedding: - oneOf: - - type: array - items: - type: number - - type: string - description: >- - The embedding vector as a list of floats (when encoding_format="float") - or as a base64-encoded string (when encoding_format="base64") + anyOf: + - items: + type: number + type: array + title: list[number] + - type: string + title: list[number] | string index: type: integer - description: >- - The index of the embedding in the input list - additionalProperties: false - required: - - object - - embedding - - index - title: OpenAIEmbeddingData - description: >- - A single embedding data object from an OpenAI-compatible embeddings response. - OpenAIEmbeddingUsage: + title: Index type: object + required: + - embedding + - index + title: OpenAIEmbeddingData + description: A single embedding data object from an OpenAI-compatible embeddings response. + OpenAIEmbeddingUsage: properties: prompt_tokens: type: integer - description: The number of tokens in the input + title: Prompt Tokens total_tokens: type: integer - description: The total number of tokens used - additionalProperties: false - required: - - prompt_tokens - - total_tokens - title: OpenAIEmbeddingUsage - description: >- - Usage information for an OpenAI-compatible embeddings response. - OpenAIEmbeddingsResponse: + title: Total Tokens type: object + required: + - prompt_tokens + - total_tokens + title: OpenAIEmbeddingUsage + description: Usage information for an OpenAI-compatible embeddings response. + OpenAIEmbeddingsResponse: properties: object: type: string const: list + title: Object default: list - description: The object type, which will be "list" data: - type: array items: $ref: '#/components/schemas/OpenAIEmbeddingData' - description: List of embedding data objects + type: array + title: Data model: type: string - description: >- - The model that was used to generate the embeddings + title: Model usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' - description: Usage information - additionalProperties: false + type: object required: - - object - - data - - model - - usage + - data + - model + - usage title: OpenAIEmbeddingsResponse - description: >- - Response from an OpenAI-compatible embeddings request. + description: Response from an OpenAI-compatible embeddings request. OpenAIFilePurpose: type: string enum: - - assistants - - batch + - assistants + - batch title: OpenAIFilePurpose - description: >- - Valid purpose values for OpenAI Files API. + description: Valid purpose values for OpenAI Files API. ListOpenAIFileResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/OpenAIFileObject' - description: List of file objects + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more files available beyond this page + title: Has More first_id: type: string - description: >- - ID of the first file in the list for pagination + title: First Id last_id: type: string - description: >- - ID of the last file in the list for pagination + title: Last Id object: type: string const: list + title: Object default: list - description: The object type, which is always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIFileResponse - description: >- - Response for listing files in OpenAI Files API. - OpenAIFileObject: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIFileResponse + description: Response for listing files in OpenAI Files API. + OpenAIFileObject: properties: object: type: string const: file + title: Object default: file - description: The object type, which is always "file" id: type: string - description: >- - The file identifier, which can be referenced in the API endpoints + title: Id bytes: type: integer - description: The size of the file, in bytes + title: Bytes created_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file was created + title: Created At expires_at: type: integer - description: >- - The Unix timestamp (in seconds) for when the file expires + title: Expires At filename: type: string - description: The name of the file + title: Filename purpose: - type: string - enum: - - assistants - - batch - description: The intended purpose of the file - additionalProperties: false - required: - - object - - id - - bytes - - created_at - - expires_at - - filename - - purpose - title: OpenAIFileObject - description: >- - OpenAI File object as defined in the OpenAI Files API. - ExpiresAfter: + $ref: '#/components/schemas/OpenAIFilePurpose' type: object + required: + - id + - bytes + - created_at + - expires_at + - filename + - purpose + title: OpenAIFileObject + description: OpenAI File object as defined in the OpenAI Files API. + ExpiresAfter: properties: anchor: type: string const: created_at + title: Anchor seconds: type: integer - additionalProperties: false + maximum: 2592000.0 + minimum: 3600.0 + title: Seconds + type: object required: - - anchor - - seconds + - anchor + - seconds title: ExpiresAfter - description: >- + description: |- Control expiration of uploaded files. Params: - anchor, must be "created_at" - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) OpenAIFileDeleteResponse: - type: object properties: id: type: string - description: The file identifier that was deleted + title: Id object: type: string const: file + title: Object default: file - description: The object type, which is always "file" deleted: type: boolean - description: >- - Whether the file was successfully deleted - additionalProperties: false + title: Deleted + type: object required: - - id - - object - - deleted + - id + - deleted title: OpenAIFileDeleteResponse - description: >- - Response for deleting a file in OpenAI Files API. + description: Response for deleting a file in OpenAI Files API. Response: - type: object title: Response - HealthInfo: type: object + HealthInfo: properties: status: - type: string - enum: - - OK - - Error - - Not Implemented - description: Current health status of the service - additionalProperties: false - required: - - status - title: HealthInfo - description: >- - Health status information for the service. - RouteInfo: + $ref: '#/components/schemas/HealthStatus' type: object + required: + - status + title: HealthInfo + description: Health status information for the service. + RouteInfo: properties: route: type: string - description: The API endpoint path + title: Route method: type: string - description: HTTP method for the route + title: Method provider_types: - type: array items: type: string - description: >- - List of provider types that implement this route - additionalProperties: false - required: - - route - - method - - provider_types - title: RouteInfo - description: >- - Information about an API route including its path, method, and implementing - providers. - ListRoutesResponse: + type: array + title: Provider Types type: object + required: + - route + - method + - provider_types + title: RouteInfo + description: Information about an API route including its path, method, and implementing providers. + ListRoutesResponse: properties: data: - type: array items: $ref: '#/components/schemas/RouteInfo' - description: >- - List of available route information objects - additionalProperties: false - required: - - data - title: ListRoutesResponse - description: >- - Response containing a list of all available API routes. - Model: + type: array + title: Data type: object + required: + - data + title: ListRoutesResponse + description: Response containing a list of all available API routes. + OpenAIModel: + properties: + id: + type: string + title: Id + object: + type: string + const: model + title: Object + default: model + created: + type: integer + title: Created + owned_by: + type: string + title: Owned By + custom_metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - id + - created + - owned_by + title: OpenAIModel + description: |- + A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + OpenAIListModelsResponse: + properties: + data: + items: + $ref: '#/components/schemas/OpenAIModel' + type: array + title: Data + type: object + required: + - data + title: OpenAIListModelsResponse + Model: properties: identifier: type: string - description: >- - Unique identifier for this resource in llama stack + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string - description: >- - Unique identifier for this resource in the provider + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string - description: >- - ID of the provider that owns this resource + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: model + title: Type default: model - description: >- - The resource type, always 'model' for model resources metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata description: Any additional metadata for this model model_type: $ref: '#/components/schemas/ModelType' default: llm - description: >- - The type of model (LLM or embedding model) - additionalProperties: false + type: object required: - - identifier - - provider_id - - type - - metadata - - model_type + - identifier + - provider_id title: Model - description: >- - A model resource representing an AI model registered in Llama Stack. + description: A model resource representing an AI model registered in Llama Stack. ModelType: type: string enum: - - llm - - embedding + - llm + - embedding + - rerank title: ModelType - description: >- - Enumeration of supported model types in Llama Stack. - ListModelsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Model' - additionalProperties: false - required: - - data - title: ListModelsResponse - RegisterModelRequest: - type: object - properties: - model_id: - type: string - description: The identifier of the model to register. - provider_model_id: - type: string - description: >- - The identifier of the model in the provider. - provider_id: - type: string - description: The identifier of the provider. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Any additional metadata for this model. - model_type: - $ref: '#/components/schemas/ModelType' - description: The type of model to register. - additionalProperties: false - required: - - model_id - title: RegisterModelRequest + description: Enumeration of supported model types in Llama Stack. RunModerationRequest: - type: object properties: input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input (or inputs) to classify. Can be a single string, an array of strings, - or an array of multi-modal input objects similar to other models. + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] model: - type: string - description: >- - The content moderation model you would like to use. - additionalProperties: false + anyOf: + - type: string + - type: 'null' + type: object required: - - input - - model + - input title: RunModerationRequest ModerationObject: - type: object properties: id: type: string - description: >- - The unique identifier for the moderation request. + title: Id model: type: string - description: >- - The model used to generate the moderation results. + title: Model results: - type: array items: $ref: '#/components/schemas/ModerationObjectResults' - description: A list of moderation objects - additionalProperties: false + type: array + title: Results + type: object required: - - id - - model - - results + - id + - model + - results title: ModerationObject description: A moderation object. ModerationObjectResults: - type: object properties: flagged: type: boolean - description: >- - Whether any of the below categories are flagged. + title: Flagged categories: - type: object - additionalProperties: - type: boolean - description: >- - A list of the categories, and whether they are flagged or not. + anyOf: + - additionalProperties: + type: boolean + type: object + - type: 'null' category_applied_input_types: - type: object - additionalProperties: - type: array - items: - type: string - description: >- - A list of the categories along with the input type(s) that the score applies - to. + anyOf: + - additionalProperties: + items: + type: string + type: array + type: object + - type: 'null' category_scores: - type: object - additionalProperties: - type: number - description: >- - A list of the categories along with their scores as predicted by model. + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' user_message: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false + title: Metadata + type: object required: - - flagged - - metadata + - flagged title: ModerationObjectResults description: A moderation object. Prompt: - type: object properties: prompt: - type: string - description: >- - The system prompt text with variable placeholders. Variables are only - supported when using the Responses API. + anyOf: + - type: string + - type: 'null' + description: The system prompt with variable placeholders version: type: integer - description: >- - Version (integer starting at 1, incremented on save) + minimum: 1.0 + title: Version + description: Version (integer starting at 1, incremented on save) prompt_id: type: string - description: >- - Unique identifier formatted as 'pmpt_<48-digit-hash>' + title: Prompt Id + description: Unique identifier in format 'pmpt_<48-digit-hash>' variables: - type: array items: type: string - description: >- - List of prompt variable names that can be used in the prompt template + type: array + title: Variables + description: List of variable names that can be used in the prompt template is_default: type: boolean + title: Is Default + description: Boolean indicating whether this version is the default version default: false - description: >- - Boolean indicating whether this version is the default version for this - prompt - additionalProperties: false - required: - - version - - prompt_id - - variables - - is_default - title: Prompt - description: >- - A prompt resource representing a stored OpenAI Compatible prompt template - in Llama Stack. - ListPromptsResponse: type: object + required: + - version + - prompt_id + title: Prompt + description: A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + ListPromptsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Prompt' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListPromptsResponse description: Response model to list prompts. CreatePromptRequest: - type: object properties: prompt: type: string - description: >- - The prompt text content with variable placeholders. + title: Prompt variables: - type: array - items: - type: string - description: >- - List of variable names that can be used in the prompt template. - additionalProperties: false + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object required: - - prompt + - prompt title: CreatePromptRequest UpdatePromptRequest: - type: object properties: prompt: type: string - description: The updated prompt text content. + title: Prompt version: type: integer - description: >- - The current version of the prompt being updated. + title: Version variables: - type: array - items: - type: string - description: >- - Updated list of variable names that can be used in the prompt template. + anyOf: + - items: + type: string + type: array + - type: 'null' set_as_default: type: boolean - description: >- - Set the new version as the default (default=True). - additionalProperties: false + title: Set As Default + default: true + type: object required: - - prompt - - version - - set_as_default + - prompt + - version title: UpdatePromptRequest SetDefaultVersionRequest: - type: object properties: version: type: integer - description: The version to set as default. - additionalProperties: false + title: Version + type: object required: - - version + - version title: SetDefaultVersionRequest ProviderInfo: - type: object properties: api: type: string - description: The API name this provider implements + title: Api provider_id: type: string - description: Unique identifier for the provider + title: Provider Id provider_type: type: string - description: The type of provider implementation + title: Provider Type config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Configuration parameters for the provider + title: Config health: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Current health status of the provider - additionalProperties: false - required: - - api - - provider_id - - provider_type - - config - - health - title: ProviderInfo - description: >- - Information about a registered provider including its configuration and health - status. - ListProvidersResponse: + title: Health type: object + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: Information about a registered provider including its configuration and health status. + ListProvidersResponse: properties: data: - type: array items: $ref: '#/components/schemas/ProviderInfo' - description: List of provider information objects - additionalProperties: false - required: - - data - title: ListProvidersResponse - description: >- - Response containing a list of all available providers. - ListOpenAIResponseObject: + type: array + title: Data type: object + required: + - data + title: ListProvidersResponse + description: Response containing a list of all available providers. + ListOpenAIResponseObject: properties: data: - type: array items: $ref: '#/components/schemas/OpenAIResponseObjectWithInput' - description: >- - List of response objects with their input context + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more results available beyond this page + title: Has More first_id: type: string - description: >- - Identifier of the first item in this page + title: First Id last_id: type: string - description: Identifier of the last item in this page + title: Last Id object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIResponseObject - description: >- - Paginated list of OpenAI response objects with navigation metadata. - OpenAIResponseError: type: object + required: + - data + - has_more + - first_id + - last_id + title: ListOpenAIResponseObject + description: Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseError: properties: code: type: string - description: >- - Error code identifying the type of failure + title: Code message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: OpenAIResponseError - description: >- - Error details for failed OpenAI response requests. - OpenAIResponseInput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMessage' - OpenAIResponseInputToolFileSearch: + title: Message type: object + required: + - code + - message + title: OpenAIResponseError + description: Error details for failed OpenAI response requests. + OpenAIResponseInput: + anyOf: + - discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage + OpenAIResponseInputToolFileSearch: properties: type: type: string const: file_search + title: Type default: file_search - description: >- - Tool type identifier, always "file_search" vector_store_ids: - type: array items: type: string - description: >- - List of vector store identifiers to search within + type: array + title: Vector Store Ids filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional filters to apply to the search + anyOf: + - additionalProperties: true + type: object + - type: 'null' max_num_results: - type: integer + anyOf: + - type: integer + maximum: 50.0 + minimum: 1.0 + - type: 'null' default: 10 - description: >- - (Optional) Maximum number of search results to return (1-50) ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: - type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - (Optional) Options for ranking and scoring search results - additionalProperties: false - required: - - type - - vector_store_ids - title: OpenAIResponseInputToolFileSearch - description: >- - File search tool configuration for OpenAI response inputs. - OpenAIResponseInputToolFunction: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions type: object + required: + - vector_store_ids + title: OpenAIResponseInputToolFileSearch + description: File search tool configuration for OpenAI response inputs. + OpenAIResponseInputToolFunction: properties: type: type: string const: function + title: Type default: function - description: Tool type identifier, always "function" name: type: string - description: Name of the function that can be called + title: Name description: - type: string - description: >- - (Optional) Description of what the function does + anyOf: + - type: string + - type: 'null' parameters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON schema defining the function's parameters + anyOf: + - additionalProperties: true + type: object + - type: 'null' strict: - type: boolean - description: >- - (Optional) Whether to enforce strict parameter validation - additionalProperties: false - required: - - type - - name - title: OpenAIResponseInputToolFunction - description: >- - Function tool configuration for OpenAI response inputs. - OpenAIResponseInputToolWebSearch: + anyOf: + - type: boolean + - type: 'null' type: object + required: + - name + - parameters + title: OpenAIResponseInputToolFunction + description: Function tool configuration for OpenAI response inputs. + OpenAIResponseInputToolWebSearch: properties: type: - oneOf: - - type: string - const: web_search - - type: string - const: web_search_preview - - type: string - const: web_search_preview_2025_03_11 + title: Type default: web_search - description: Web search tool type variant to use - search_context_size: type: string + enum: + - web_search + - web_search_preview + - web_search_preview_2025_03_11 + - web_search_2025_08_26 + search_context_size: + anyOf: + - type: string + pattern: ^low|medium|high$ + - type: 'null' default: medium - description: >- - (Optional) Size of search context, must be "low", "medium", or "high" - additionalProperties: false - required: - - type - title: OpenAIResponseInputToolWebSearch - description: >- - Web search tool configuration for OpenAI response inputs. - OpenAIResponseObjectWithInput: type: object + title: OpenAIResponseInputToolWebSearch + description: Web search tool configuration for OpenAI response inputs. + OpenAIResponseObjectWithInput: properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' input: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: >- - List of input items that led to this response - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Input + type: object required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - - input + - created_at + - id + - model + - output + - status + - input title: OpenAIResponseObjectWithInput - description: >- - OpenAI response object extended with input context information. + description: OpenAI response object extended with input context information. OpenAIResponseOutput: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - OpenAIResponseText: + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) + OpenAIResponsePrompt: + properties: + id: + type: string + title: Id + variables: + anyOf: + - additionalProperties: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: object + - type: 'null' + version: + anyOf: + - type: string + - type: 'null' type: object + required: + - id + title: OpenAIResponsePrompt + description: OpenAI compatible Prompt object that is used in OpenAI responses. + OpenAIResponseText: properties: format: - type: object - properties: - type: - oneOf: - - type: string - const: text - - type: string - const: json_schema - - type: string - const: json_object - description: >- - Must be "text", "json_schema", or "json_object" to identify the format - type - name: - type: string - description: >- - The name of the response format. Only used for json_schema. - schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. Only used for json_schema. - description: - type: string - description: >- - (Optional) A description of the response format. Only used for json_schema. - strict: - type: boolean - description: >- - (Optional) Whether to strictly enforce the JSON schema. If true, the - response must match the schema exactly. Only used for json_schema. - additionalProperties: false - required: - - type - description: >- - (Optional) Text format configuration specifying output format requirements - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseTextFormat' + title: OpenAIResponseTextFormat + - type: 'null' + title: OpenAIResponseTextFormat + type: object title: OpenAIResponseText - description: >- - Text response configuration for OpenAI responses. + description: Text response configuration for OpenAI responses. OpenAIResponseTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - title: OpenAIResponseToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response object. - OpenAIResponseUsage: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + title: OpenAIResponseToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response object. + OpenAIResponseUsage: properties: input_tokens: type: integer - description: Number of tokens in the input + title: Input Tokens output_tokens: type: integer - description: Number of tokens in the output + title: Output Tokens total_tokens: type: integer - description: Total tokens used (input + output) + title: Total Tokens input_tokens_details: - type: object - properties: - cached_tokens: - type: integer - description: Number of tokens retrieved from cache - additionalProperties: false - description: Detailed breakdown of input token usage + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageInputTokensDetails' + title: OpenAIResponseUsageInputTokensDetails + - type: 'null' + title: OpenAIResponseUsageInputTokensDetails output_tokens_details: - type: object - properties: - reasoning_tokens: - type: integer - description: >- - Number of tokens used for reasoning (o1/o3 models) - additionalProperties: false - description: Detailed breakdown of output token usage - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsageOutputTokensDetails' + title: OpenAIResponseUsageOutputTokensDetails + - type: 'null' + title: OpenAIResponseUsageOutputTokensDetails + type: object required: - - input_tokens - - output_tokens - - total_tokens + - input_tokens + - output_tokens + - total_tokens title: OpenAIResponseUsage description: Usage information for OpenAI response. ResponseGuardrailSpec: - type: object + description: Specification for a guardrail to apply during response generation. properties: type: + title: Type type: string - description: The type/identifier of the guardrail. - additionalProperties: false required: - - type + - type title: ResponseGuardrailSpec - description: >- - Specification for a guardrail to apply during response generation. + type: object OpenAIResponseInputTool: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' - - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' - - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' discriminator: - propertyName: type mapping: - web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' function: '#/components/schemas/OpenAIResponseInputToolFunction' mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + title: OpenAIResponseInputToolWebSearch | ... (4 variants) OpenAIResponseInputToolMCP: - type: object properties: type: type: string const: mcp + title: Type default: mcp - description: Tool type identifier, always "mcp" server_label: type: string - description: Label to identify this MCP server + title: Server Label server_url: type: string - description: URL endpoint of the MCP server + title: Server Url headers: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) HTTP headers to include when connecting to the server + anyOf: + - additionalProperties: true + type: object + - type: 'null' + authorization: + anyOf: + - type: string + - type: 'null' require_approval: - oneOf: - - type: string - const: always - - type: string - const: never - - type: object - properties: - always: - type: array - items: - type: string - description: >- - (Optional) List of tool names that always require approval - never: - type: array - items: - type: string - description: >- - (Optional) List of tool names that never require approval - additionalProperties: false - title: ApprovalFilter - description: >- - Filter configuration for MCP tool approval requirements. + anyOf: + - type: string + const: always + - type: string + const: never + - $ref: '#/components/schemas/ApprovalFilter' + title: ApprovalFilter + title: string | ApprovalFilter default: never - description: >- - Approval requirement for tool calls ("always", "never", or filter) allowed_tools: - oneOf: - - type: array - items: - type: string - - type: object - properties: - tool_names: - type: array - items: - type: string - description: >- - (Optional) List of specific tool names that are allowed - additionalProperties: false - title: AllowedToolsFilter - description: >- - Filter configuration for restricting which MCP tools can be used. - description: >- - (Optional) Restriction on which tools can be used from this server - additionalProperties: false - required: - - type - - server_label - - server_url - - require_approval - title: OpenAIResponseInputToolMCP - description: >- - Model Context Protocol (MCP) tool configuration for OpenAI response inputs. - CreateOpenaiResponseRequest: + anyOf: + - items: + type: string + type: array + title: list[string] + - $ref: '#/components/schemas/AllowedToolsFilter' + title: AllowedToolsFilter + - type: 'null' + title: list[string] | AllowedToolsFilter type: object + required: + - server_label + - server_url + title: OpenAIResponseInputToolMCP + description: Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + CreateOpenaiResponseRequest: properties: input: - oneOf: - - type: string - - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: Input message(s) to create the response. + anyOf: + - type: string + - items: + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Input' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Input | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Input' + title: OpenAIResponseMessage-Input + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Input + type: array + title: list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] + title: string | list[OpenAIResponseMessageUnion | OpenAIResponseInputFunctionToolCallOutput | ...] model: type: string - description: The underlying LLM used for completions. + title: Model + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt instructions: - type: string + anyOf: + - type: string + - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) if specified, the new response will be a continuation of the - previous response. This can be used to easily fork-off new responses from - existing responses. + anyOf: + - type: string + - type: 'null' conversation: - type: string - description: >- - (Optional) The ID of a conversation to add the response to. Must begin - with 'conv_'. Input and output messages will be automatically added to - the conversation. + anyOf: + - type: string + - type: 'null' store: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: true stream: - type: boolean + anyOf: + - type: boolean + - type: 'null' + default: false temperature: - type: number + anyOf: + - type: number + - type: 'null' text: - $ref: '#/components/schemas/OpenAIResponseText' + anyOf: + - $ref: '#/components/schemas/OpenAIResponseText' + title: OpenAIResponseText + - type: 'null' + title: OpenAIResponseText tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInputTool' + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseInputToolMCP' + title: OpenAIResponseInputToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseInputToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' include: - type: array - items: - type: string - description: >- - (Optional) Additional fields to include in the response. + anyOf: + - items: + type: string + type: array + - type: 'null' max_infer_iters: - type: integer - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + default: 10 + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + type: object required: - - input - - model + - input + - model title: CreateOpenaiResponseRequest OpenAIResponseObject: - type: object properties: created_at: type: integer - description: >- - Unix timestamp when the response was created + title: Created At error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed + anyOf: + - $ref: '#/components/schemas/OpenAIResponseError' + title: OpenAIResponseError + - type: 'null' + title: OpenAIResponseError id: type: string - description: Unique identifier for this response + title: Id model: type: string - description: Model identifier used for generation + title: Model object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" output: - type: array items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + type: array + title: Output parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation + anyOf: + - type: string + - type: 'null' + prompt: + anyOf: + - $ref: '#/components/schemas/OpenAIResponsePrompt' + title: OpenAIResponsePrompt + - type: 'null' + title: OpenAIResponsePrompt status: type: string - description: >- - Current status of the response generation + title: Status temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation + anyOf: + - type: number + - type: 'null' text: $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response + default: + format: + type: text top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation + anyOf: + - type: number + - type: 'null' tools: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseTool' - description: >- - (Optional) An array of tools the model may call while generating a response. + anyOf: + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch' + title: OpenAIResponseInputToolFileSearch + - $ref: '#/components/schemas/OpenAIResponseInputToolFunction' + title: OpenAIResponseInputToolFunction + - $ref: '#/components/schemas/OpenAIResponseToolMCP' + title: OpenAIResponseToolMCP + discriminator: + propertyName: type + mapping: + file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch' + function: '#/components/schemas/OpenAIResponseInputToolFunction' + mcp: '#/components/schemas/OpenAIResponseToolMCP' + web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_2025_08_26: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview: '#/components/schemas/OpenAIResponseInputToolWebSearch' + web_search_preview_2025_03_11: '#/components/schemas/OpenAIResponseInputToolWebSearch' + title: OpenAIResponseInputToolWebSearch | ... (4 variants) + type: array + - type: 'null' truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response + anyOf: + - type: string + - type: 'null' usage: - $ref: '#/components/schemas/OpenAIResponseUsage' - description: >- - (Optional) Token usage information for the response + anyOf: + - $ref: '#/components/schemas/OpenAIResponseUsage' + title: OpenAIResponseUsage + - type: 'null' + title: OpenAIResponseUsage instructions: - type: string - description: >- - (Optional) System message inserted into the model's context - additionalProperties: false - required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - title: OpenAIResponseObject - description: >- - Complete OpenAI response object containing generation results and metadata. - OpenAIResponseContentPartOutputText: + anyOf: + - type: string + - type: 'null' + max_tool_calls: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' type: object + required: + - created_at + - id + - model + - output + - status + title: OpenAIResponseObject + description: Complete OpenAI response object containing generation results and metadata. + OpenAIResponseContentPartOutputText: + description: Text content within a streamed response part. properties: type: - type: string const: output_text default: output_text - description: >- - Content part type identifier, always "output_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Text emitted for this content part annotations: - type: array items: - $ref: '#/components/schemas/OpenAIResponseAnnotations' - description: >- - Structured annotations associated with the text + discriminator: + mapping: + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) + title: Annotations + type: array logprobs: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: (Optional) Token log probability details - additionalProperties: false + anyOf: + - items: + additionalProperties: true + type: object + type: array + - type: 'null' + nullable: true required: - - type - - text - - annotations + - text title: OpenAIResponseContentPartOutputText - description: >- - Text content within a streamed response part. - "OpenAIResponseContentPartReasoningSummary": type: object + OpenAIResponseContentPartReasoningSummary: + description: Reasoning summary part in a streamed response. properties: type: - type: string const: summary_text default: summary_text - description: >- - Content part type identifier, always "summary_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Summary text - additionalProperties: false required: - - type - - text - title: >- - OpenAIResponseContentPartReasoningSummary - description: >- - Reasoning summary part in a streamed response. - OpenAIResponseContentPartReasoningText: + - text + title: OpenAIResponseContentPartReasoningSummary type: object + OpenAIResponseContentPartReasoningText: + description: Reasoning text emitted as part of a streamed response. properties: type: - type: string const: reasoning_text default: reasoning_text - description: >- - Content part type identifier, always "reasoning_text" - text: + title: Type + type: string + text: + title: Text type: string - description: Reasoning text supplied by the model - additionalProperties: false required: - - type - - text + - text title: OpenAIResponseContentPartReasoningText - description: >- - Reasoning text emitted as part of a streamed response. + type: object OpenAIResponseObjectStream: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: - propertyName: type mapping: - response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' - response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' - response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' - response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' - response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' - response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' - response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' - response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' - response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' - response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' - response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' - response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' - response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' - response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' - response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' - response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' - response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' - response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' response.reasoning_summary_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' response.reasoning_summary_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' response.reasoning_summary_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' response.reasoning_summary_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + response.reasoning_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + response.reasoning_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' response.refusal.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' response.refusal.done: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' - response.output_text.annotation.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' - response.file_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' - response.file_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' - response.file_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' - response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' - response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' - "OpenAIResponseObjectStreamResponseCompleted": - type: object + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + title: OpenAIResponseObjectStreamResponseCreated + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' + title: OpenAIResponseObjectStreamResponseInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + title: OpenAIResponseObjectStreamResponseOutputItemAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' + title: OpenAIResponseObjectStreamResponseOutputItemDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + title: OpenAIResponseObjectStreamResponseOutputTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + title: OpenAIResponseObjectStreamResponseOutputTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + title: OpenAIResponseObjectStreamResponseMcpCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + title: OpenAIResponseObjectStreamResponseMcpCallFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' + title: OpenAIResponseObjectStreamResponseMcpCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' + title: OpenAIResponseObjectStreamResponseContentPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + title: OpenAIResponseObjectStreamResponseContentPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningTextDone' + title: OpenAIResponseObjectStreamResponseReasoningTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryPartDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseReasoningSummaryTextDone' + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDelta' + title: OpenAIResponseObjectStreamResponseRefusalDelta + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseRefusalDone' + title: OpenAIResponseObjectStreamResponseRefusalDone + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded' + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallInProgress' + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallSearching' + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFileSearchCallCompleted' + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + title: OpenAIResponseObjectStreamResponseIncomplete + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' + title: OpenAIResponseObjectStreamResponseFailed + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + title: OpenAIResponseObjectStreamResponseCompleted + title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + OpenAIResponseObjectStreamResponseCompleted: + description: Streaming event indicating a response has been completed. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Completed response object type: - type: string const: response.completed default: response.completed - description: >- - Event type identifier, always "response.completed" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCompleted - description: >- - Streaming event indicating a response has been completed. - "OpenAIResponseObjectStreamResponseContentPartAdded": + - response + title: OpenAIResponseObjectStreamResponseCompleted type: object + OpenAIResponseObjectStreamResponseContentPartAdded: + description: Streaming event for when a new content part is added to a response item. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The content part that was added + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.added default: response.content_part.added - description: >- - Event type identifier, always "response.content_part.added" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartAdded - description: >- - Streaming event for when a new content part is added to a response item. - "OpenAIResponseObjectStreamResponseContentPartDone": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartAdded type: object + OpenAIResponseObjectStreamResponseContentPartDone: + description: Streaming event for when a content part is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the part within the content array response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this content item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item containing this content part output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response part: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' - - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: - propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' - refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' - description: The completed content part + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.content_part.done default: response.content_part.done - description: >- - Event type identifier, always "response.content_part.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - response_id - - item_id - - output_index - - part - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseContentPartDone - description: >- - Streaming event for when a content part is completed. - "OpenAIResponseObjectStreamResponseCreated": + - content_index + - response_id + - item_id + - output_index + - part + - sequence_number + title: OpenAIResponseObjectStreamResponseContentPartDone type: object + OpenAIResponseObjectStreamResponseCreated: + description: Streaming event indicating a new response has been created. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The response object that was created type: - type: string const: response.created default: response.created - description: >- - Event type identifier, always "response.created" - additionalProperties: false + title: Type + type: string required: - - response - - type - title: >- - OpenAIResponseObjectStreamResponseCreated - description: >- - Streaming event indicating a new response has been created. - OpenAIResponseObjectStreamResponseFailed: + - response + title: OpenAIResponseObjectStreamResponseCreated type: object + OpenAIResponseObjectStreamResponseFailed: + description: Streaming event emitted when a response fails. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Response object describing the failure sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.failed default: response.failed - description: >- - Event type identifier, always "response.failed" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type + - response + - sequence_number title: OpenAIResponseObjectStreamResponseFailed - description: >- - Streaming event emitted when a response fails. - "OpenAIResponseObjectStreamResponseFileSearchCallCompleted": type: object + OpenAIResponseObjectStreamResponseFileSearchCallCompleted: + description: Streaming event for completed file search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.completed default: response.file_search_call.completed - description: >- - Event type identifier, always "response.file_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallCompleted - description: >- - Streaming event for completed file search calls. - "OpenAIResponseObjectStreamResponseFileSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseFileSearchCallInProgress: + description: Streaming event for file search calls in progress. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.in_progress default: response.file_search_call.in_progress - description: >- - Event type identifier, always "response.file_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallInProgress - description: >- - Streaming event for file search calls in progress. - "OpenAIResponseObjectStreamResponseFileSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseFileSearchCallSearching: + description: Streaming event for file search currently searching. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the file search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.file_search_call.searching default: response.file_search_call.searching - description: >- - Event type identifier, always "response.file_search_call.searching" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFileSearchCallSearching - description: >- - Streaming event for file search currently searching. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFileSearchCallSearching type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta: + description: Streaming event for incremental function call argument updates. properties: delta: + title: Delta type: string - description: >- - Incremental function call arguments being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the function call being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.delta default: response.function_call_arguments.delta - description: >- - Event type identifier, always "response.function_call_arguments.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta - description: >- - Streaming event for incremental function call argument updates. - "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone: + description: Streaming event for when function call arguments are completed. properties: arguments: + title: Arguments type: string - description: >- - Final complete arguments JSON string for the function call item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed function call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.function_call_arguments.done default: response.function_call_arguments.done - description: >- - Event type identifier, always "response.function_call_arguments.done" - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone - description: >- - Streaming event for when function call arguments are completed. - "OpenAIResponseObjectStreamResponseInProgress": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseInProgress: + description: Streaming event indicating the response remains in progress. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: Current response state while in progress sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.in_progress default: response.in_progress - description: >- - Event type identifier, always "response.in_progress" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseInProgress - description: >- - Streaming event indicating the response remains in progress. - "OpenAIResponseObjectStreamResponseIncomplete": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseInProgress type: object + OpenAIResponseObjectStreamResponseIncomplete: + description: Streaming event emitted when a response ends in an incomplete state. properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: >- - Response object describing the incomplete state sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.incomplete default: response.incomplete - description: >- - Event type identifier, always "response.incomplete" - additionalProperties: false + title: Type + type: string required: - - response - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseIncomplete - description: >- - Streaming event emitted when a response ends in an incomplete state. - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + - response + - sequence_number + title: OpenAIResponseObjectStreamResponseIncomplete type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta: properties: delta: + title: Delta type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.delta default: response.mcp_call.arguments.delta - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta - "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta type: object + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone: properties: arguments: + title: Arguments type: string item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_call.arguments.done default: response.mcp_call.arguments.done - additionalProperties: false + title: Type + type: string required: - - arguments - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallArgumentsDone - "OpenAIResponseObjectStreamResponseMcpCallCompleted": + - arguments + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallArgumentsDone type: object + OpenAIResponseObjectStreamResponseMcpCallCompleted: + description: Streaming event for completed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.completed default: response.mcp_call.completed - description: >- - Event type identifier, always "response.mcp_call.completed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallCompleted - description: Streaming event for completed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallCompleted type: object + OpenAIResponseObjectStreamResponseMcpCallFailed: + description: Streaming event for failed MCP calls. properties: sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.failed default: response.mcp_call.failed - description: >- - Event type identifier, always "response.mcp_call.failed" - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallFailed - description: Streaming event for failed MCP calls. - "OpenAIResponseObjectStreamResponseMcpCallInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallFailed type: object + OpenAIResponseObjectStreamResponseMcpCallInProgress: + description: Streaming event for MCP calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the MCP call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.mcp_call.in_progress default: response.mcp_call.in_progress - description: >- - Event type identifier, always "response.mcp_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpCallInProgress - description: >- - Streaming event for MCP calls in progress. - "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpCallInProgress type: object + OpenAIResponseObjectStreamResponseMcpListToolsCompleted: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.completed default: response.mcp_list_tools.completed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsCompleted - "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsCompleted type: object + OpenAIResponseObjectStreamResponseMcpListToolsFailed: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.failed default: response.mcp_list_tools.failed - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsFailed - "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsFailed type: object + OpenAIResponseObjectStreamResponseMcpListToolsInProgress: properties: sequence_number: + title: Sequence Number type: integer type: - type: string const: response.mcp_list_tools.in_progress default: response.mcp_list_tools.in_progress - additionalProperties: false + title: Type + type: string required: - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseMcpListToolsInProgress - "OpenAIResponseObjectStreamResponseOutputItemAdded": + - sequence_number + title: OpenAIResponseObjectStreamResponseMcpListToolsInProgress type: object + OpenAIResponseObjectStreamResponseOutputItemAdded: + description: Streaming event for when a new output item is added to the response. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The output item that was added (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.added default: response.output_item.added - description: >- - Event type identifier, always "response.output_item.added" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemAdded - description: >- - Streaming event for when a new output item is added to the response. - "OpenAIResponseObjectStreamResponseOutputItemDone": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemAdded type: object + OpenAIResponseObjectStreamResponseOutputItemDone: + description: Streaming event for when an output item is completed. properties: response_id: + title: Response Id type: string - description: >- - Unique identifier of the response containing this output item: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' discriminator: - propertyName: type mapping: - message: '#/components/schemas/OpenAIResponseMessage' - web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - description: >- - The completed output item (message, tool call, etc.) + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + title: OpenAIResponseMessage | ... (7 variants) output_index: + title: Output Index type: integer - description: >- - Index position of this item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_item.done default: response.output_item.done - description: >- - Event type identifier, always "response.output_item.done" - additionalProperties: false + title: Type + type: string required: - - response_id - - item - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputItemDone - description: >- - Streaming event for when an output item is completed. - "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded": + - response_id + - item + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputItemDone type: object + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded: + description: Streaming event for when an annotation is added to output text. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the item to which the annotation is being added output_index: + title: Output Index type: integer - description: >- - Index position of the output item in the response's output array content_index: + title: Content Index type: integer - description: >- - Index position of the content part within the output item annotation_index: + title: Annotation Index type: integer - description: >- - Index of the annotation within the content part annotation: - oneOf: - - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' - - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' discriminator: - propertyName: type mapping: - file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' - url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' - description: The annotation object being added + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + title: OpenAIResponseAnnotationFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + title: OpenAIResponseAnnotationCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + title: OpenAIResponseAnnotationContainerFileCitation + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + title: OpenAIResponseAnnotationFilePath + title: OpenAIResponseAnnotationFileCitation | ... (4 variants) sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.annotation.added default: response.output_text.annotation.added - description: >- - Event type identifier, always "response.output_text.annotation.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - content_index - - annotation_index - - annotation - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded - description: >- - Streaming event for when an annotation is added to output text. - "OpenAIResponseObjectStreamResponseOutputTextDelta": + - item_id + - output_index + - content_index + - annotation_index + - annotation + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded type: object + OpenAIResponseObjectStreamResponseOutputTextDelta: + description: Streaming event for incremental text content updates. properties: content_index: + title: Content Index type: integer - description: Index position within the text content delta: + title: Delta type: string - description: Incremental text content being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.delta default: response.output_text.delta - description: >- - Event type identifier, always "response.output_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDelta - description: >- - Streaming event for incremental text content updates. - "OpenAIResponseObjectStreamResponseOutputTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDelta type: object + OpenAIResponseObjectStreamResponseOutputTextDone: + description: Streaming event for when text output is completed. properties: content_index: + title: Content Index type: integer - description: Index position within the text content text: + title: Text type: string - description: >- - Final complete text content of the output item item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.output_text.done default: response.output_text.done - description: >- - Event type identifier, always "response.output_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseOutputTextDone - description: >- - Streaming event for when text output is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseOutputTextDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded: + description: Streaming event for when a new reasoning summary part is added. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The summary part that was added sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.added default: response.reasoning_summary_part.added - description: >- - Event type identifier, always "response.reasoning_summary_part.added" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded - description: >- - Streaming event for when a new reasoning summary part is added. - "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded type: object + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone: + description: Streaming event for when a reasoning summary part is completed. properties: item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item part: $ref: '#/components/schemas/OpenAIResponseContentPartReasoningSummary' - description: The completed summary part sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_part.done default: response.reasoning_summary_part.done - description: >- - Event type identifier, always "response.reasoning_summary_part.done" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - part - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryPartDone - description: >- - Streaming event for when a reasoning summary part is completed. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta": + - item_id + - output_index + - part + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryPartDone type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta: + description: Streaming event for incremental reasoning summary text updates. properties: delta: + title: Delta type: string - description: Incremental summary text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.delta default: response.reasoning_summary_text.delta - description: >- - Event type identifier, always "response.reasoning_summary_text.delta" - additionalProperties: false + title: Type + type: string required: - - delta - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta - description: >- - Streaming event for incremental reasoning summary text updates. - "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone": + - delta + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone: + description: Streaming event for when reasoning summary text is completed. properties: text: + title: Text type: string - description: Final complete summary text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: Index position of the output item sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events summary_index: + title: Summary Index type: integer - description: >- - Index of the summary part within the reasoning summary type: - type: string const: response.reasoning_summary_text.done default: response.reasoning_summary_text.done - description: >- - Event type identifier, always "response.reasoning_summary_text.done" - additionalProperties: false + title: Type + type: string required: - - text - - item_id - - output_index - - sequence_number - - summary_index - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningSummaryTextDone - description: >- - Streaming event for when reasoning summary text is completed. - "OpenAIResponseObjectStreamResponseReasoningTextDelta": + - text + - item_id + - output_index + - sequence_number + - summary_index + title: OpenAIResponseObjectStreamResponseReasoningSummaryTextDone type: object + OpenAIResponseObjectStreamResponseReasoningTextDelta: + description: Streaming event for incremental reasoning text updates. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part delta: + title: Delta type: string - description: Incremental reasoning text being added item_id: + title: Item Id type: string - description: >- - Unique identifier of the output item being updated output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.delta default: response.reasoning_text.delta - description: >- - Event type identifier, always "response.reasoning_text.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDelta - description: >- - Streaming event for incremental reasoning text updates. - "OpenAIResponseObjectStreamResponseReasoningTextDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDelta type: object + OpenAIResponseObjectStreamResponseReasoningTextDone: + description: Streaming event for when reasoning text is completed. properties: content_index: + title: Content Index type: integer - description: >- - Index position of the reasoning content part text: + title: Text type: string - description: Final complete reasoning text item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.reasoning_text.done default: response.reasoning_text.done - description: >- - Event type identifier, always "response.reasoning_text.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - text - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseReasoningTextDone - description: >- - Streaming event for when reasoning text is completed. - "OpenAIResponseObjectStreamResponseRefusalDelta": + - content_index + - text + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseReasoningTextDone type: object + OpenAIResponseObjectStreamResponseRefusalDelta: + description: Streaming event for incremental refusal text updates. properties: content_index: + title: Content Index type: integer - description: Index position of the content part delta: + title: Delta type: string - description: Incremental refusal text being added item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.delta default: response.refusal.delta - description: >- - Event type identifier, always "response.refusal.delta" - additionalProperties: false + title: Type + type: string required: - - content_index - - delta - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDelta - description: >- - Streaming event for incremental refusal text updates. - "OpenAIResponseObjectStreamResponseRefusalDone": + - content_index + - delta + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDelta type: object + OpenAIResponseObjectStreamResponseRefusalDone: + description: Streaming event for when refusal text is completed. properties: content_index: + title: Content Index type: integer - description: Index position of the content part refusal: + title: Refusal type: string - description: Final complete refusal text item_id: + title: Item Id type: string - description: Unique identifier of the output item output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.refusal.done default: response.refusal.done - description: >- - Event type identifier, always "response.refusal.done" - additionalProperties: false + title: Type + type: string required: - - content_index - - refusal - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseRefusalDone - description: >- - Streaming event for when refusal text is completed. - "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + - content_index + - refusal + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseRefusalDone type: object + OpenAIResponseObjectStreamResponseWebSearchCallCompleted: + description: Streaming event for completed web search calls. properties: item_id: + title: Item Id type: string - description: >- - Unique identifier of the completed web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.completed default: response.web_search_call.completed - description: >- - Event type identifier, always "response.web_search_call.completed" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallCompleted - description: >- - Streaming event for completed web search calls. - "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallCompleted type: object + OpenAIResponseObjectStreamResponseWebSearchCallInProgress: + description: Streaming event for web search calls in progress. properties: item_id: + title: Item Id type: string - description: Unique identifier of the web search call output_index: + title: Output Index type: integer - description: >- - Index position of the item in the output list sequence_number: + title: Sequence Number type: integer - description: >- - Sequential number for ordering streaming events type: - type: string const: response.web_search_call.in_progress default: response.web_search_call.in_progress - description: >- - Event type identifier, always "response.web_search_call.in_progress" - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallInProgress - description: >- - Streaming event for web search calls in progress. - "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallInProgress type: object + OpenAIResponseObjectStreamResponseWebSearchCallSearching: properties: item_id: + title: Item Id type: string output_index: + title: Output Index type: integer sequence_number: + title: Sequence Number type: integer type: - type: string const: response.web_search_call.searching default: response.web_search_call.searching - additionalProperties: false + title: Type + type: string required: - - item_id - - output_index - - sequence_number - - type - title: >- - OpenAIResponseObjectStreamResponseWebSearchCallSearching - OpenAIDeleteResponseObject: + - item_id + - output_index + - sequence_number + title: OpenAIResponseObjectStreamResponseWebSearchCallSearching type: object + OpenAIDeleteResponseObject: properties: id: type: string - description: >- - Unique identifier of the deleted response + title: Id object: type: string const: response + title: Object default: response - description: >- - Object type identifier, always "response" deleted: type: boolean + title: Deleted default: true - description: Deletion confirmation flag, always True - additionalProperties: false - required: - - id - - object - - deleted - title: OpenAIDeleteResponseObject - description: >- - Response object confirming deletion of an OpenAI response. - ListOpenAIResponseInputItem: type: object + required: + - id + title: OpenAIDeleteResponseObject + description: Response object confirming deletion of an OpenAI response. + ListOpenAIResponseInputItem: properties: data: - type: array items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: List of input items + anyOf: + - oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + discriminator: + propertyName: type + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage-Output' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseMessage-Output | ... (7 variants) + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseMessage-Output' + title: OpenAIResponseMessage-Output + title: OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalResponse | OpenAIResponseMessage-Output + type: array + title: Data object: type: string const: list + title: Object default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - object - title: ListOpenAIResponseInputItem - description: >- - List container for OpenAI response input items. - RunShieldRequest: type: object + required: + - data + title: ListOpenAIResponseInputItem + description: List container for OpenAI response input items. + RunShieldRequest: properties: shield_id: type: string - description: The identifier of the shield to run. + title: Shield Id messages: - type: array items: - $ref: '#/components/schemas/OpenAIMessageParam' - description: The messages to run the shield on. - params: - type: object - additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false + - $ref: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input + - $ref: '#/components/schemas/OpenAISystemMessageParam' + title: OpenAISystemMessageParam + - $ref: '#/components/schemas/OpenAIAssistantMessageParam-Input' + title: OpenAIAssistantMessageParam-Input + - $ref: '#/components/schemas/OpenAIToolMessageParam' + title: OpenAIToolMessageParam + - $ref: '#/components/schemas/OpenAIDeveloperMessageParam' + title: OpenAIDeveloperMessageParam + discriminator: + propertyName: role + mapping: + assistant: '#/components/schemas/OpenAIAssistantMessageParam-Input' + developer: '#/components/schemas/OpenAIDeveloperMessageParam' + system: '#/components/schemas/OpenAISystemMessageParam' + tool: '#/components/schemas/OpenAIToolMessageParam' + user: '#/components/schemas/OpenAIUserMessageParam-Input' + title: OpenAIUserMessageParam-Input | ... (5 variants) + type: array + title: Messages + params: + additionalProperties: true + type: object + title: Params + type: object required: - - shield_id - - messages - - params + - shield_id + - messages + - params title: RunShieldRequest RunShieldResponse: - type: object properties: violation: - $ref: '#/components/schemas/SafetyViolation' - description: >- - (Optional) Safety violation detected by the shield, if any - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/SafetyViolation' + title: SafetyViolation + - type: 'null' + title: SafetyViolation + type: object title: RunShieldResponse description: Response from running a safety shield. SafetyViolation: - type: object properties: violation_level: $ref: '#/components/schemas/ViolationLevel' - description: Severity level of the violation user_message: - type: string - description: >- - (Optional) Message to convey to the user about the violation + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata including specific violation codes for debugging and - telemetry - additionalProperties: false + title: Metadata + type: object required: - - violation_level - - metadata + - violation_level title: SafetyViolation - description: >- - Details of a safety violation detected by content moderation. + description: Details of a safety violation detected by content moderation. ViolationLevel: type: string enum: - - info - - warn - - error + - info + - warn + - error title: ViolationLevel description: Severity level of a safety violation. - AgentTurnInputType: - type: object - properties: - type: - type: string - const: agent_turn_input - default: agent_turn_input - description: >- - Discriminator type. Always "agent_turn_input" - additionalProperties: false - required: - - type - title: AgentTurnInputType - description: Parameter type for agent turn input. AggregationFunctionType: type: string enum: - - average - - weighted_average - - median - - categorical_count - - accuracy + - average + - weighted_average + - median + - categorical_count + - accuracy title: AggregationFunctionType - description: >- - Types of aggregation functions for scoring results. + description: Types of aggregation functions for scoring results. ArrayType: - type: object properties: type: type: string const: array + title: Type default: array - description: Discriminator type. Always "array" - additionalProperties: false - required: - - type + type: object title: ArrayType description: Parameter type for array values. BasicScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: basic + title: Type default: basic - description: >- - The type of scoring function parameters, always basic aggregation_functions: - type: array items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - aggregation_functions - title: BasicScoringFnParams - description: >- - Parameters for basic scoring function configuration. - BooleanType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: BasicScoringFnParams + description: Parameters for basic scoring function configuration. + BooleanType: properties: type: type: string const: boolean + title: Type default: boolean - description: Discriminator type. Always "boolean" - additionalProperties: false - required: - - type + type: object title: BooleanType description: Parameter type for boolean values. ChatCompletionInputType: - type: object properties: type: type: string const: chat_completion_input + title: Type default: chat_completion_input - description: >- - Discriminator type. Always "chat_completion_input" - additionalProperties: false - required: - - type - title: ChatCompletionInputType - description: >- - Parameter type for chat completion input. - CompletionInputType: type: object + title: ChatCompletionInputType + description: Parameter type for chat completion input. + CompletionInputType: properties: type: type: string const: completion_input + title: Type default: completion_input - description: >- - Discriminator type. Always "completion_input" - additionalProperties: false - required: - - type + type: object title: CompletionInputType description: Parameter type for completion input. JsonType: - type: object properties: type: type: string const: json + title: Type default: json - description: Discriminator type. Always "json" - additionalProperties: false - required: - - type + type: object title: JsonType description: Parameter type for JSON values. LLMAsJudgeScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: llm_as_judge + title: Type default: llm_as_judge - description: >- - The type of scoring function parameters, always llm_as_judge judge_model: type: string - description: >- - Identifier of the LLM model to use as a judge for scoring + title: Judge Model prompt_template: - type: string - description: >- - (Optional) Custom prompt template for the judge model + anyOf: + - type: string + - type: 'null' judge_score_regexes: - type: array items: type: string - description: >- - Regexes to extract the answer from generated response - aggregation_functions: type: array + title: Judge Score Regexes + description: Regexes to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - judge_model - - judge_score_regexes - - aggregation_functions - title: LLMAsJudgeScoringFnParams - description: >- - Parameters for LLM-as-judge scoring function configuration. - NumberType: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + required: + - judge_model + title: LLMAsJudgeScoringFnParams + description: Parameters for LLM-as-judge scoring function configuration. + NumberType: properties: type: type: string const: number + title: Type default: number - description: Discriminator type. Always "number" - additionalProperties: false - required: - - type + type: object title: NumberType description: Parameter type for numeric values. ObjectType: - type: object properties: type: type: string const: object + title: Type default: object - description: Discriminator type. Always "object" - additionalProperties: false - required: - - type + type: object title: ObjectType description: Parameter type for object values. RegexParserScoringFnParams: - type: object properties: type: - $ref: '#/components/schemas/ScoringFnParamsType' + type: string const: regex_parser + title: Type default: regex_parser - description: >- - The type of scoring function parameters, always regex_parser parsing_regexes: - type: array items: type: string - description: >- - Regex to extract the answer from generated response - aggregation_functions: type: array + title: Parsing Regexes + description: Regex to extract the answer from generated response + aggregation_functions: items: $ref: '#/components/schemas/AggregationFunctionType' - description: >- - Aggregation functions to apply to the scores of each row - additionalProperties: false - required: - - type - - parsing_regexes - - aggregation_functions - title: RegexParserScoringFnParams - description: >- - Parameters for regex parser scoring function configuration. - ScoringFn: + type: array + title: Aggregation Functions + description: Aggregation functions to apply to the scores of each row type: object + title: RegexParserScoringFnParams + description: Parameters for regex parser scoring function configuration. + ScoringFn: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: scoring_function + title: Type default: scoring_function - description: >- - The resource type, always scoring_function description: - type: string + anyOf: + - type: string + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + title: Metadata + description: Any additional metadata for this definition return_type: oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + description: The return type of the deterministic function discriminator: propertyName: type mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' + boolean: '#/components/schemas/BooleanType' chat_completion_input: '#/components/schemas/ChatCompletionInputType' completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + description: The parameters for the scoring function for benchmark eval, these can be overridden for app eval + type: object required: - - identifier - - provider_id - - type - - metadata - - return_type + - identifier + - provider_id + - return_type title: ScoringFn - description: >- - A scoring function resource for evaluating model outputs. + description: A scoring function resource for evaluating model outputs. ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' discriminator: - propertyName: type mapping: + basic: '#/components/schemas/BasicScoringFnParams' llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams ScoringFnParamsType: - type: string + description: Types of scoring function parameter configurations. enum: - - llm_as_judge - - regex_parser - - basic + - llm_as_judge + - regex_parser + - basic title: ScoringFnParamsType - description: >- - Types of scoring function parameter configurations. + type: string StringType: - type: object properties: type: type: string const: string + title: Type default: string - description: Discriminator type. Always "string" - additionalProperties: false - required: - - type + type: object title: StringType description: Parameter type for string values. UnionType: - type: object properties: type: type: string const: union + title: Type default: union - description: Discriminator type. Always "union" - additionalProperties: false - required: - - type + type: object title: UnionType description: Parameter type for union values. ListScoringFunctionsResponse: - type: object properties: data: - type: array items: $ref: '#/components/schemas/ScoringFn' - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListScoringFunctionsResponse - ParamType: - oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' - discriminator: - propertyName: type - mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' - array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' - chat_completion_input: '#/components/schemas/ChatCompletionInputType' - completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' - RegisterScoringFunctionRequest: - type: object - properties: - scoring_fn_id: - type: string - description: >- - The ID of the scoring function to register. - description: - type: string - description: The description of the scoring function. - return_type: - $ref: '#/components/schemas/ParamType' - description: The return type of the scoring function. - provider_scoring_fn_id: - type: string - description: >- - The ID of the provider scoring function to use for the scoring function. - provider_id: - type: string - description: >- - The ID of the provider to use for the scoring function. - params: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - The parameters for the scoring function for benchmark eval, these can - be overridden for app eval. - additionalProperties: false - required: - - scoring_fn_id - - description - - return_type - title: RegisterScoringFunctionRequest ScoreRequest: - type: object properties: input_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to score. + type: array + title: Input Rows scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions + type: object required: - - input_rows - - scoring_functions + - input_rows + - scoring_functions title: ScoreRequest ScoreResponse: - type: object properties: results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult. - additionalProperties: false + type: object + title: Results + type: object required: - - results + - results title: ScoreResponse description: The response from scoring. ScoringResult: - type: object properties: score_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The scoring result for each row. Each row is a map of column name to value. + type: array + title: Score Rows aggregated_results: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Map of metric name to aggregated value - additionalProperties: false + title: Aggregated Results + type: object required: - - score_rows - - aggregated_results + - score_rows + - aggregated_results title: ScoringResult description: A scoring result for a single row. ScoreBatchRequest: - type: object properties: dataset_id: type: string - description: The ID of the dataset to score. + title: Dataset Id scoring_functions: - type: object additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: AdditionalpropertiesUnion + type: object + title: Scoring Functions save_results_dataset: type: boolean - description: >- - Whether to save the results to a dataset. - additionalProperties: false + title: Save Results Dataset + default: false + type: object required: - - dataset_id - - scoring_functions - - save_results_dataset + - dataset_id + - scoring_functions title: ScoreBatchRequest ScoreBatchResponse: - type: object properties: dataset_id: - type: string - description: >- - (Optional) The identifier of the dataset that was scored + anyOf: + - type: string + - type: 'null' results: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult - additionalProperties: false - required: - - results - title: ScoreBatchResponse - description: >- - Response from batch scoring operations on datasets. - Shield: + type: object + title: Results type: object + required: + - results + title: ScoreBatchResponse + description: Response from batch scoring operations on datasets. + Shield: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: shield + title: Type default: shield - description: The resource type, always shield params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Configuration parameters for the shield - additionalProperties: false - required: - - identifier - - provider_id - - type - title: Shield - description: >- - A safety shield resource that can be used to check content. - ListShieldsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: Shield + description: A safety shield resource that can be used to check content. + ListShieldsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Shield' - additionalProperties: false - required: - - data - title: ListShieldsResponse - RegisterShieldRequest: - type: object - properties: - shield_id: - type: string - description: >- - The identifier of the shield to register. - provider_shield_id: - type: string - description: >- - The identifier of the shield in the provider. - provider_id: - type: string - description: The identifier of the provider. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false - required: - - shield_id - title: RegisterShieldRequest - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - ImageContentItem: + title: Data type: object + required: + - data + title: ListShieldsResponse + InvokeToolRequest: + properties: + tool_name: + type: string + title: Tool Name + kwargs: + additionalProperties: true + type: object + title: Kwargs + authorization: + anyOf: + - type: string + - type: 'null' + type: object + required: + - tool_name + - kwargs + title: InvokeToolRequest + ImageContentItem: + description: A image content item properties: type: - type: string const: image default: image - description: >- - Discriminator type of the content item. Always "image" + title: Type + type: string image: - type: object - properties: - url: - $ref: '#/components/schemas/URL' - description: >- - A URL of the image or data URL in the format of data:image/{type};base64,{data}. - Note that URL could have length limits. - data: - type: string - contentEncoding: base64 - description: base64 encoded image data as string - additionalProperties: false - description: >- - Image as a base64 encoded string or an URL - additionalProperties: false + $ref: '#/components/schemas/_URLOrData' required: - - type - - image + - image title: ImageContentItem - description: A image content item + type: object InterleavedContent: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - InterleavedContentItem: - oneOf: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + InterleavedContentItem: discriminator: - propertyName: type mapping: image: '#/components/schemas/ImageContentItem' text: '#/components/schemas/TextContentItem' - Message: + propertyName: type oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/UserMessage' - system: '#/components/schemas/SystemMessage' - tool: '#/components/schemas/ToolResponseMessage' - assistant: '#/components/schemas/CompletionMessage' - SystemMessage: - type: object - properties: - role: - type: string - const: system - default: system - description: >- - Must be "system" to identify this as a system message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the "system prompt". If multiple system messages are provided, - they are concatenated. The underlying Llama Stack code may also add other - system messages (for example, for formatting tool definitions). - additionalProperties: false - required: - - role - - content - title: SystemMessage - description: >- - A system message providing instructions or context to the model. + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem TextContentItem: - type: object properties: type: type: string const: text + title: Type default: text - description: >- - Discriminator type of the content item. Always "text" text: type: string - description: Text content - additionalProperties: false + title: Text + type: object required: - - type - - text + - text title: TextContentItem description: A text content item - ToolCall: - type: object + ToolInvocationResult: properties: - call_id: - type: string - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - arguments: - type: string - additionalProperties: false - required: - - call_id - - tool_name - - arguments - title: ToolCall - ToolResponseMessage: - type: object - properties: - role: - type: string - const: tool - default: tool - description: >- - Must be "tool" to identify this as a tool response - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - additionalProperties: false - required: - - role - - call_id - - content - title: ToolResponseMessage - description: >- - A message representing the result of a tool invocation. - URL: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem-Output | TextContentItem] + error_message: + anyOf: + - type: string + - type: 'null' + error_code: + anyOf: + - type: integer + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + title: ToolInvocationResult + description: Result of a tool invocation. + URL: properties: uri: type: string - description: The URL string pointing to the resource - additionalProperties: false + title: Uri + type: object required: - - uri + - uri title: URL description: A URL reference to external content. - UserMessage: - type: object - properties: - role: - type: string - const: user - default: user - description: >- - Must be "user" to identify this as a user message - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the message, which can include text and other media - context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) This field is used internally by Llama Stack to pass RAG context. - This field may be removed in the API in the future. - additionalProperties: false - required: - - role - - content - title: UserMessage - description: >- - A message from the user in a chat conversation. - SyntheticDataGenerateRequest: - type: object - properties: - dialogs: - type: array - items: - $ref: '#/components/schemas/Message' - description: >- - List of conversation messages to use as input for synthetic data generation - filtering_function: - type: string - enum: - - none - - random - - top_k - - top_p - - top_k_top_p - - sigmoid - description: >- - Type of filtering to apply to generated synthetic data samples - model: - type: string - description: >- - (Optional) The identifier of the model to use. The model must be registered - with Llama Stack and available via the /models endpoint - additionalProperties: false - required: - - dialogs - - filtering_function - title: SyntheticDataGenerateRequest - SyntheticDataGenerationResponse: - type: object - properties: - synthetic_data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - List of generated synthetic data samples that passed the filtering criteria - statistics: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Statistical information about the generation process and filtering - results - additionalProperties: false - required: - - synthetic_data - title: SyntheticDataGenerationResponse - description: >- - Response from the synthetic data generation. Batch of (prompt, response, score) - tuples that pass the threshold. - InvokeToolRequest: - type: object - properties: - tool_name: - type: string - description: The name of the tool to invoke. - kwargs: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool. - additionalProperties: false - required: - - tool_name - - kwargs - title: InvokeToolRequest - ToolInvocationResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The output content from the tool execution - error_message: - type: string - description: >- - (Optional) Error message if the tool execution failed - error_code: - type: integer - description: >- - (Optional) Numeric error code if the tool execution failed - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool execution - additionalProperties: false - title: ToolInvocationResult - description: Result of a tool invocation. ToolDef: - type: object properties: toolgroup_id: - type: string - description: >- - (Optional) ID of the tool group this tool belongs to + anyOf: + - type: string + - type: 'null' name: type: string - description: Name of the tool + title: Name description: - type: string - description: >- - (Optional) Human-readable description of what the tool does + anyOf: + - type: string + - type: 'null' input_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool inputs (MCP inputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' output_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) JSON Schema for tool outputs (MCP outputSchema) + anyOf: + - additionalProperties: true + type: object + - type: 'null' metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - name - title: ToolDef - description: >- - Tool definition used in runtime contexts. - ListToolDefsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - name + title: ToolDef + description: Tool definition used in runtime contexts. + ListToolDefsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolDef' - description: List of tool definitions - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolDefsResponse - description: >- - Response containing a list of tool definitions. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. + description: Response containing a list of tool definitions. ToolGroup: - type: object properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: tool_group + title: Type default: tool_group - description: Type of resource, always 'tool_group' mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - (Optional) Model Context Protocol endpoint for remote tools + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional arguments for the tool group - additionalProperties: false - required: - - identifier - - provider_id - - type - title: ToolGroup - description: >- - A group of related tools managed together. - ListToolGroupsResponse: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object + required: + - identifier + - provider_id + title: ToolGroup + description: A group of related tools managed together. + ListToolGroupsResponse: properties: data: - type: array items: $ref: '#/components/schemas/ToolGroup' - description: List of tool groups - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListToolGroupsResponse - description: >- - Response containing a list of tool groups. - RegisterToolGroupRequest: - type: object - properties: - toolgroup_id: - type: string - description: The ID of the tool group to register. - provider_id: - type: string - description: >- - The ID of the provider to use for the tool group. - mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - The MCP endpoint to use for the tool group. - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool group. - additionalProperties: false - required: - - toolgroup_id - - provider_id - title: RegisterToolGroupRequest + description: Response containing a list of tool groups. Chunk: - type: object + description: A chunk of content that can be inserted into a vector database. properties: content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, or other - types. - metadata: - type: object - additionalProperties: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk that will be used in the model context - during inference. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - stored_chunk_id: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + chunk_id: + title: Chunk Id type: string - description: >- - The chunk ID that is stored in the vector database. Used for backend functionality. + metadata: + additionalProperties: true + title: Metadata + type: object + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true chunk_metadata: - $ref: '#/components/schemas/ChunkMetadata' - description: >- - Metadata for the chunk that will NOT be used in the context during inference. - The `chunk_metadata` is required backend functionality. - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + nullable: true + title: ChunkMetadata required: - - content - - metadata + - content + - chunk_id title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. - ChunkMetadata: type: object + ChunkMetadata: properties: chunk_id: - type: string - description: >- - The ID of the chunk. If not set, it will be generated based on the document - ID and content. + anyOf: + - type: string + - type: 'null' document_id: - type: string - description: >- - The ID of the document this chunk belongs to. + anyOf: + - type: string + - type: 'null' source: - type: string - description: >- - The source of the content, such as a URL, file path, or other identifier. + anyOf: + - type: string + - type: 'null' created_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was created. + anyOf: + - type: integer + - type: 'null' updated_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was last updated. + anyOf: + - type: integer + - type: 'null' chunk_window: - type: string - description: >- - The window of the chunk, which can be used to group related chunks together. + anyOf: + - type: string + - type: 'null' chunk_tokenizer: - type: string - description: >- - The tokenizer used to create the chunk. Default is Tiktoken. + anyOf: + - type: string + - type: 'null' chunk_embedding_model: - type: string - description: >- - The embedding model used to create the chunk's embedding. + anyOf: + - type: string + - type: 'null' chunk_embedding_dimension: - type: integer - description: >- - The dimension of the embedding vector for the chunk. + anyOf: + - type: integer + - type: 'null' content_token_count: - type: integer - description: >- - The number of tokens in the content of the chunk. + anyOf: + - type: integer + - type: 'null' metadata_token_count: - type: integer - description: >- - The number of tokens in the metadata of the chunk. - additionalProperties: false - title: ChunkMetadata - description: >- - `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional - information about the chunk that will not be used in the context during - inference, but is required for backend functionality. The `ChunkMetadata` is - set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not - expected to change after. Use `Chunk.metadata` for metadata that will - be used in the context during inference. - InsertChunksRequest: + anyOf: + - type: integer + - type: 'null' type: object + title: ChunkMetadata + description: |- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + InsertChunksRequest: properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to insert the chunks into. + title: Vector Store Id chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - The chunks to insert. Each `Chunk` should contain content which can be - interleaved text, images, or other types. `metadata`: `dict[str, Any]` - and `embedding`: `List[float]` are optional. If `metadata` is provided, - you configure how Llama Stack formats the chunk during generation. If - `embedding` is not provided, it will be computed later. + $ref: '#/components/schemas/Chunk-Input' + type: array + title: Chunks ttl_seconds: - type: integer - description: The time to live of the chunks. - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + type: object required: - - vector_db_id - - chunks + - vector_store_id + - chunks title: InsertChunksRequest QueryChunksRequest: - type: object properties: - vector_db_id: + vector_store_id: type: string - description: >- - The identifier of the vector database to query. + title: Vector Store Id query: - $ref: '#/components/schemas/InterleavedContent' - description: The query to search for. + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the query. - additionalProperties: false + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object required: - - vector_db_id - - query + - vector_store_id + - query title: QueryChunksRequest QueryChunksResponse: - type: object properties: chunks: - type: array items: - $ref: '#/components/schemas/Chunk' - description: >- - List of content chunks returned from the query - scores: + $ref: '#/components/schemas/Chunk-Output' type: array + title: Chunks + scores: items: type: number - description: >- - Relevance scores corresponding to each returned chunk - additionalProperties: false - required: - - chunks - - scores - title: QueryChunksResponse - description: >- - Response from querying chunks in a vector database. - VectorStoreFileCounts: + type: array + title: Scores type: object + required: + - chunks + - scores + title: QueryChunksResponse + description: Response from querying chunks in a vector database. + VectorStoreFileCounts: properties: completed: type: integer - description: >- - Number of files that have been successfully processed + title: Completed cancelled: type: integer - description: >- - Number of files that had their processing cancelled + title: Cancelled failed: type: integer - description: Number of files that failed to process + title: Failed in_progress: type: integer - description: >- - Number of files currently being processed + title: In Progress total: type: integer - description: >- - Total number of files in the vector store - additionalProperties: false - required: - - completed - - cancelled - - failed - - in_progress - - total - title: VectorStoreFileCounts - description: >- - File processing status counts for a vector store. - VectorStoreListResponse: + title: Total type: object + required: + - completed + - cancelled + - failed + - in_progress + - total + title: VectorStoreFileCounts + description: File processing status counts for a vector store. + VectorStoreListResponse: properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreObject' - description: List of vector store objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first vector store in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last vector store in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more vector stores available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreListResponse description: Response from listing vector stores. VectorStoreObject: - type: object properties: id: type: string - description: Unique identifier for the vector store + title: Id object: type: string + title: Object default: vector_store - description: >- - Object type identifier, always "vector_store" created_at: type: integer - description: >- - Timestamp when the vector store was created + title: Created At name: - type: string - description: (Optional) Name of the vector store + anyOf: + - type: string + - type: 'null' usage_bytes: type: integer + title: Usage Bytes default: 0 - description: >- - Storage space used by the vector store in bytes file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the vector store status: type: string + title: Status default: completed - description: Current status of the vector store expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store + anyOf: + - additionalProperties: true + type: object + - type: 'null' expires_at: - type: integer - description: >- - (Optional) Timestamp when the vector store will expire + anyOf: + - type: integer + - type: 'null' last_active_at: - type: integer - description: >- - (Optional) Timestamp of last activity on the vector store + anyOf: + - type: integer + - type: 'null' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false + title: Metadata + type: object required: - - id - - object - - created_at - - usage_bytes - - file_counts - - status - - metadata + - id + - created_at + - file_counts title: VectorStoreObject description: OpenAI Vector Store object. - "OpenAICreateVectorStoreRequestWithExtraBody": - type: object - properties: - name: - type: string - description: (Optional) A name for the vector store - file_ids: - type: array - items: - type: string - description: >- - List of file IDs to include in the vector store - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Expiration policy for the vector store - chunking_strategy: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Strategy for splitting files into chunks - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of key-value pairs that can be attached to the vector store - additionalProperties: false - title: >- - OpenAICreateVectorStoreRequestWithExtraBody - description: >- - Request to create a vector store with extra_body support. - OpenaiUpdateVectorStoreRequest: - type: object - properties: - name: - type: string - description: The name of the vector store. - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The expiration policy for a vector store. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of 16 key-value pairs that can be attached to an object. - additionalProperties: false - title: OpenaiUpdateVectorStoreRequest - VectorStoreDeleteResponse: - type: object - properties: - id: - type: string - description: >- - Unique identifier of the deleted vector store - object: - type: string - default: vector_store.deleted - description: >- - Object type identifier for the deletion response - deleted: - type: boolean - default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreDeleteResponse - description: Response from deleting a vector store. VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' discriminator: - propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + propertyName: type + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic VectorStoreChunkingStrategyAuto: - type: object properties: type: type: string const: auto + title: Type default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: type: object + title: VectorStoreChunkingStrategyAuto + description: Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: properties: type: type: string const: static + title: Type default: static - description: >- - Strategy type, always "static" for static chunking static: $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: type: object + required: + - static + title: VectorStoreChunkingStrategyStatic + description: Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: properties: chunk_overlap_tokens: type: integer + title: Chunk Overlap Tokens default: 400 - description: >- - Number of tokens to overlap between adjacent chunks max_chunk_size_tokens: type: integer + maximum: 4096.0 + minimum: 100.0 + title: Max Chunk Size Tokens default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens + type: object title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. - "OpenAICreateVectorStoreFileBatchRequestWithExtraBody": - type: object + description: Configuration for static chunking strategy. + OpenAICreateVectorStoreRequestWithExtraBody: properties: + name: + anyOf: + - type: string + - type: 'null' file_ids: - type: array - items: - type: string - description: >- - A list of File IDs that the vector store should use - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value attributes to store with the files + anyOf: + - items: + type: string + type: array + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - (Optional) The chunking strategy used to chunk the file(s). Defaults to - auto - additionalProperties: false - required: - - file_ids - title: >- - OpenAICreateVectorStoreFileBatchRequestWithExtraBody - description: >- - Request to create a vector store file batch with extra_body support. - VectorStoreFileBatchObject: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + additionalProperties: true type: object + title: OpenAICreateVectorStoreRequestWithExtraBody + description: Request to create a vector store with extra_body support. + OpenaiUpdateVectorStoreRequest: + properties: + name: + anyOf: + - type: string + - type: 'null' + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + title: OpenaiUpdateVectorStoreRequest + VectorStoreDeleteResponse: properties: id: type: string - description: Unique identifier for the file batch + title: Id object: type: string + title: Object + default: vector_store.deleted + deleted: + type: boolean + title: Deleted + default: true + type: object + required: + - id + title: VectorStoreDeleteResponse + description: Response from deleting a vector store. + OpenAICreateVectorStoreFileBatchRequestWithExtraBody: + properties: + file_ids: + items: + type: string + type: array + title: File Ids + attributes: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + chunking_strategy: + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + additionalProperties: true + type: object + required: + - file_ids + title: OpenAICreateVectorStoreFileBatchRequestWithExtraBody + description: Request to create a vector store file batch with extra_body support. + VectorStoreFileBatchObject: + properties: + id: + type: string + title: Id + object: + type: string + title: Object default: vector_store.file_batch - description: >- - Object type identifier, always "vector_store.file_batch" created_at: type: integer - description: >- - Timestamp when the file batch was created + title: Created At vector_store_id: type: string - description: >- - ID of the vector store containing the file batch + title: Vector Store Id status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: >- - Current processing status of the file batch + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed file_counts: $ref: '#/components/schemas/VectorStoreFileCounts' - description: >- - File processing status counts for the batch - additionalProperties: false + type: object required: - - id - - object - - created_at - - vector_store_id - - status - - file_counts + - id + - created_at + - vector_store_id + - status + - file_counts title: VectorStoreFileBatchObject description: OpenAI Vector Store File Batch object. VectorStoreFileStatus: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed VectorStoreFileLastError: - type: object properties: code: - oneOf: - - type: string - const: server_error - - type: string - const: rate_limit_exceeded - description: >- - Error code indicating the type of failure + title: Code + type: string + enum: + - server_error + - rate_limit_exceeded + default: server_error message: type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: VectorStoreFileLastError - description: >- - Error information for failed vector store file processing. - VectorStoreFileObject: + title: Message type: object + required: + - code + - message + title: VectorStoreFileLastError + description: Error information for failed vector store file processing. + VectorStoreFileObject: properties: id: type: string - description: Unique identifier for the file + title: Id object: type: string + title: Object default: vector_store.file - description: >- - Object type identifier, always "vector_store.file" attributes: - type: object additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file + anyOf: + - type: string + maxLength: 512 + - type: number + - type: boolean + title: string | number | boolean + propertyNames: + type: string + maxLength: 64 + type: object + maxProperties: 16 + title: Attributes + description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers. + x-oaiTypeLabel: map chunking_strategy: oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic discriminator: propertyName: type mapping: auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - description: >- - Strategy used for splitting the file into chunks created_at: type: integer - description: >- - Timestamp when the file was added to the vector store + title: Created At last_error: - $ref: '#/components/schemas/VectorStoreFileLastError' - description: >- - (Optional) Error information if file processing failed + anyOf: + - $ref: '#/components/schemas/VectorStoreFileLastError' + title: VectorStoreFileLastError + - type: 'null' + title: VectorStoreFileLastError status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: Current processing status of the file + title: Status + type: string + enum: + - completed + - in_progress + - cancelled + - failed + default: completed usage_bytes: type: integer + title: Usage Bytes default: 0 - description: Storage space used by this file in bytes vector_store_id: type: string - description: >- - ID of the vector store containing this file - additionalProperties: false + title: Vector Store Id + type: object required: - - id - - object - - attributes - - chunking_strategy - - created_at - - status - - usage_bytes - - vector_store_id + - id + - chunking_strategy + - created_at + - status + - vector_store_id title: VectorStoreFileObject description: OpenAI Vector Store File object. VectorStoreFilesListInBatchResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: >- - List of vector store file objects in the batch + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false + type: object required: - - object - - data - - has_more + - data title: VectorStoreFilesListInBatchResponse - description: >- - Response from listing files in a vector store file batch. + description: Response from listing files in a vector store file batch. VectorStoreListFilesResponse: - type: object properties: object: type: string + title: Object default: list - description: Object type identifier, always "list" data: - type: array items: $ref: '#/components/schemas/VectorStoreFileObject' - description: List of vector store file objects + type: array + title: Data first_id: - type: string - description: >- - (Optional) ID of the first file in the list for pagination + anyOf: + - type: string + - type: 'null' last_id: - type: string - description: >- - (Optional) ID of the last file in the list for pagination + anyOf: + - type: string + - type: 'null' has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more files available beyond this page - additionalProperties: false - required: - - object - - data - - has_more - title: VectorStoreListFilesResponse - description: >- - Response from listing files in a vector store. - OpenaiAttachFileToVectorStoreRequest: type: object + required: + - data + title: VectorStoreListFilesResponse + description: Response from listing files in a vector store. + OpenaiAttachFileToVectorStoreRequest: properties: file_id: type: string - description: >- - The ID of the file to attach to the vector store. + title: File Id attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The key-value attributes stored with the file, which can be used for filtering. + anyOf: + - additionalProperties: true + type: object + - type: 'null' chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - The chunking strategy to use for the file. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + title: VectorStoreChunkingStrategyAuto + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyStatic + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic + - type: 'null' + title: Chunking Strategy + type: object required: - - file_id + - file_id title: OpenaiAttachFileToVectorStoreRequest OpenaiUpdateVectorStoreFileRequest: - type: object properties: attributes: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The updated key-value attributes to store with the file. - additionalProperties: false + title: Attributes + type: object required: - - attributes + - attributes title: OpenaiUpdateVectorStoreFileRequest VectorStoreFileDeleteResponse: - type: object properties: id: type: string - description: Unique identifier of the deleted file + title: Id object: type: string + title: Object default: vector_store.file.deleted - description: >- - Object type identifier for the deletion response deleted: type: boolean + title: Deleted default: true - description: >- - Whether the deletion operation was successful - additionalProperties: false - required: - - id - - object - - deleted - title: VectorStoreFileDeleteResponse - description: >- - Response from deleting a vector store file. - VectorStoreContent: type: object + required: + - id + title: VectorStoreFileDeleteResponse + description: Response from deleting a vector store file. + VectorStoreContent: properties: type: type: string const: text - description: >- - Content type, currently only "text" is supported + title: Type text: type: string - description: The actual text content - additionalProperties: false - required: - - type - - text - title: VectorStoreContent - description: >- - Content item from a vector store file or search result. - VectorStoreFileContentsResponse: - type: object - properties: - file_id: - type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file - additionalProperties: false - required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse - description: >- - Response from retrieving the contents of a vector store file. - OpenaiSearchVectorStoreRequest: - type: object - properties: - query: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - The query string or array for performing the search. - filters: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Filters based on file attributes to narrow the search results. - max_num_results: - type: integer - description: >- - Maximum number of results to return (1 to 50 inclusive, default 10). - ranking_options: - type: object - properties: - ranker: - type: string - description: >- - (Optional) Name of the ranking algorithm to use - score_threshold: + title: Text + embedding: + anyOf: + - items: type: number - default: 0.0 - description: >- - (Optional) Minimum relevance score threshold for results - additionalProperties: false - description: >- - Ranking options for fine-tuning the search results. - rewrite_query: - type: boolean - description: >- - Whether to rewrite the natural language query for vector search (default - false) - search_mode: - type: string - description: >- - The search mode to use - "keyword", "vector", or "hybrid" (default "vector") - additionalProperties: false - required: - - query - title: OpenaiSearchVectorStoreRequest - VectorStoreSearchResponse: + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' type: object - properties: - file_id: - type: string - description: >- - Unique identifier of the file containing the result - filename: - type: string - description: Name of the file containing the result - score: - type: number - description: Relevance score for this search result - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: number - - type: boolean - description: >- - (Optional) Key-value attributes associated with the file - content: - type: array - items: - $ref: '#/components/schemas/VectorStoreContent' - description: >- - List of content items matching the search query - additionalProperties: false required: - - file_id - - filename - - score - - content - title: VectorStoreSearchResponse - description: Response from searching a vector store. - VectorStoreSearchResponsePage: - type: object + - type + - text + title: VectorStoreContent + description: Content item from a vector store file or search result. + VectorStoreFileContentResponse: properties: object: type: string - default: vector_store.search_results.page - description: >- - Object type identifier for the search results page - search_query: - type: string - description: >- - The original search query that was executed + const: vector_store.file_content.page + title: Object + default: vector_store.file_content.page data: - type: array items: - $ref: '#/components/schemas/VectorStoreSearchResponse' - description: List of search result objects + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Data has_more: type: boolean + title: Has More default: false - description: >- - Whether there are more results available beyond this page next_page: - type: string - description: >- - (Optional) Token for retrieving the next page of results - additionalProperties: false - required: - - object - - search_query - - data - - has_more - title: VectorStoreSearchResponsePage - description: >- - Paginated response from searching a vector store. - VersionInfo: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + title: VectorStoreFileContentResponse + description: Represents the parsed content of a vector store file. + OpenaiSearchVectorStoreRequest: + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + max_num_results: + anyOf: + - type: integer + - type: 'null' + default: 10 + ranking_options: + anyOf: + - $ref: '#/components/schemas/SearchRankingOptions' + title: SearchRankingOptions + - type: 'null' + title: SearchRankingOptions + rewrite_query: + anyOf: + - type: boolean + - type: 'null' + default: false + search_mode: + anyOf: + - type: string + - type: 'null' + default: vector + type: object + required: + - query + title: OpenaiSearchVectorStoreRequest + VectorStoreSearchResponse: + properties: + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + attributes: + anyOf: + - additionalProperties: + anyOf: + - type: string + - type: number + - type: boolean + title: string | number | boolean + type: object + - type: 'null' + content: + items: + $ref: '#/components/schemas/VectorStoreContent' + type: array + title: Content + type: object + required: + - file_id + - filename + - score + - content + title: VectorStoreSearchResponse + description: Response from searching a vector store. + VectorStoreSearchResponsePage: + properties: + object: + type: string + title: Object + default: vector_store.search_results.page + search_query: + items: + type: string + type: array + title: Search Query + data: + items: + $ref: '#/components/schemas/VectorStoreSearchResponse' + type: array + title: Data + has_more: + type: boolean + title: Has More + default: false + next_page: + anyOf: + - type: string + - type: 'null' + type: object + required: + - search_query + - data + title: VectorStoreSearchResponsePage + description: Paginated response from searching a vector store. + VersionInfo: properties: version: type: string - description: Version number of the service - additionalProperties: false + title: Version + type: object required: - - version + - version title: VersionInfo description: Version information for the service. AppendRowsRequest: - type: object properties: rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to append to the dataset. - additionalProperties: false + type: array + title: Rows + type: object required: - - rows + - rows title: AppendRowsRequest PaginatedResponse: - type: object properties: data: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The list of items for the current page + type: array + title: Data has_more: type: boolean - description: >- - Whether there are more items available after this set + title: Has More url: - type: string - description: The URL for accessing this list - additionalProperties: false - required: - - data - - has_more - title: PaginatedResponse - description: >- - A generic paginated response that follows a simple format. - Dataset: + anyOf: + - type: string + - type: 'null' type: object + required: + - data + - has_more + title: PaginatedResponse + description: A generic paginated response that follows a simple format. + Dataset: properties: identifier: type: string + title: Identifier + description: Unique identifier for this resource in llama stack provider_resource_id: - type: string + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider provider_id: type: string + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt const: dataset + title: Type default: dataset - description: >- - Type of resource, always 'dataset' for datasets purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - Purpose of the dataset indicating its intended use + $ref: '#/components/schemas/DatasetPurpose' source: oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource discriminator: propertyName: type mapping: - uri: '#/components/schemas/URIDataSource' rows: '#/components/schemas/RowsDataSource' - description: >- - Data source configuration for the dataset + uri: '#/components/schemas/URIDataSource' metadata: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the dataset - additionalProperties: false - required: - - identifier - - provider_id - - type - - purpose - - source - - metadata - title: Dataset - description: >- - Dataset resource for storing and accessing training or evaluation data. - RowsDataSource: + title: Metadata + description: Any additional metadata for this dataset type: object + required: + - identifier + - provider_id + - purpose + - source + title: Dataset + description: Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: properties: type: type: string const: rows + title: Type default: rows rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}]} ] - additionalProperties: false + type: array + title: Rows + type: object required: - - type - - rows + - rows title: RowsDataSource description: A dataset stored in rows. URIDataSource: - type: object properties: type: type: string const: uri + title: Type default: uri uri: type: string - description: >- - The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" - additionalProperties: false - required: - - type - - uri - title: URIDataSource - description: >- - A dataset that can be obtained from a URI. - ListDatasetsResponse: + title: Uri type: object + required: + - uri + title: URIDataSource + description: A dataset that can be obtained from a URI. + ListDatasetsResponse: properties: data: - type: array items: $ref: '#/components/schemas/Dataset' - description: List of datasets - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: ListDatasetsResponse description: Response from listing datasets. - DataSource: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - RegisterDatasetRequest: - type: object + Benchmark: properties: - purpose: + identifier: type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - The purpose of the dataset. One of: - "post-training/messages": The dataset - contains a messages column with list of messages for post-training. { - "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", - "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset - contains a question column and an answer column for evaluation. { "question": - "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": - The dataset contains a messages column with list of messages and an answer - column for evaluation. { "messages": [ {"role": "user", "content": "Hello, - my name is John Doe."}, {"role": "assistant", "content": "Hello, John - Doe. How can I help you today?"}, {"role": "user", "content": "What's - my name?"}, ], "answer": "John Doe" } - source: - $ref: '#/components/schemas/DataSource' - description: >- - The data source of the dataset. Ensure that the data source schema is - compatible with the purpose of the dataset. Examples: - { "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": - "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" - } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" - } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": - "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] - } ] } - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The metadata for the dataset. - E.g. {"description": "My dataset"}. - dataset_id: + title: Identifier + description: Unique identifier for this resource in llama stack + provider_resource_id: + anyOf: + - type: string + - type: 'null' + description: Unique identifier for this resource in the provider + provider_id: type: string - description: >- - The ID of the dataset. If not provided, an ID will be generated. - additionalProperties: false - required: - - purpose - - source - title: RegisterDatasetRequest - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - description: >- - The model identifier to use for the agent - instructions: - type: string - description: The system instructions for the agent - name: - type: string - description: >- - Optional name for the agent, used in telemetry and identification - enable_session_persistence: - type: boolean - default: false - description: >- - Optional flag indicating whether session data has to be persisted - response_format: - $ref: '#/components/schemas/ResponseFormat' - description: Optional response format configuration - additionalProperties: false - required: - - model - - instructions - title: AgentConfig - description: Configuration for an agent. - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - GrammarResponseFormat: - type: object - properties: + title: Provider Id + description: ID of the provider that owns this resource type: type: string - enum: - - json_schema - - grammar - description: >- - Must be "grammar" to identify this format type - const: grammar - default: grammar - bnf: + const: benchmark + title: Type + default: benchmark + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + metadata: + additionalProperties: true type: object + title: Metadata + description: Metadata for this evaluation task + type: object + required: + - identifier + - provider_id + - dataset_id + - scoring_functions + title: Benchmark + description: A benchmark resource for evaluating model performance. + ListBenchmarksResponse: + properties: + data: + items: + $ref: '#/components/schemas/Benchmark' + type: array + title: Data + type: object + required: + - data + title: ListBenchmarksResponse + BenchmarkConfig: + properties: + eval_candidate: + $ref: '#/components/schemas/ModelCandidate' + scoring_params: additionalProperties: oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The BNF grammar specification the response should conform to - additionalProperties: false - required: - - type - - bnf - title: GrammarResponseFormat - description: >- - Configuration for grammar-guided response generation. - GreedySamplingStrategy: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + type: object + title: Scoring Params + description: Map between scoring function id and parameters for each scoring function you want to run + num_examples: + anyOf: + - type: integer + - type: 'null' + description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated type: object + required: + - eval_candidate + title: BenchmarkConfig + description: A benchmark configuration for evaluation. + GreedySamplingStrategy: properties: type: type: string const: greedy + title: Type default: greedy - description: >- - Must be "greedy" to identify this sampling strategy - additionalProperties: false - required: - - type + type: object title: GreedySamplingStrategy - description: >- - Greedy sampling strategy that selects the highest probability token at each - step. - JsonSchemaResponseFormat: - type: object - properties: - type: - type: string - enum: - - json_schema - - grammar - description: >- - Must be "json_schema" to identify this format type - const: json_schema - default: json_schema - json_schema: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The JSON schema the response should conform to. In a Python SDK, this - is often a `pydantic` model. - additionalProperties: false - required: - - type - - json_schema - title: JsonSchemaResponseFormat - description: >- - Configuration for JSON schema-guided response generation. - ResponseFormat: - oneOf: - - $ref: '#/components/schemas/JsonSchemaResponseFormat' - - $ref: '#/components/schemas/GrammarResponseFormat' - discriminator: - propertyName: type - mapping: - json_schema: '#/components/schemas/JsonSchemaResponseFormat' - grammar: '#/components/schemas/GrammarResponseFormat' - SamplingParams: - type: object - properties: - strategy: - oneOf: - - $ref: '#/components/schemas/GreedySamplingStrategy' - - $ref: '#/components/schemas/TopPSamplingStrategy' - - $ref: '#/components/schemas/TopKSamplingStrategy' - discriminator: - propertyName: type - mapping: - greedy: '#/components/schemas/GreedySamplingStrategy' - top_p: '#/components/schemas/TopPSamplingStrategy' - top_k: '#/components/schemas/TopKSamplingStrategy' - description: The sampling strategy. - max_tokens: - type: integer - default: 0 - description: >- - The maximum number of tokens that can be generated in the completion. - The token count of your prompt plus max_tokens cannot exceed the model's - context length. - repetition_penalty: - type: number - default: 1.0 - description: >- - Number between -2.0 and 2.0. Positive values penalize new tokens based - on whether they appear in the text so far, increasing the model's likelihood - to talk about new topics. - stop: - type: array - items: - type: string - description: >- - Up to 4 sequences where the API will stop generating further tokens. The - returned text will not contain the stop sequence. - additionalProperties: false - required: - - strategy - title: SamplingParams - description: Sampling parameters. - ToolConfig: - type: object - properties: - tool_choice: - oneOf: - - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following - capabilities of the model. - - type: string - default: auto - description: >- - (Optional) Whether tool use is automatic, required, or none. Can also - specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - (Optional) Instructs the model how to format tool calls. By default, Llama - Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a - tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python - syntax -- a list of function calls. - system_message_behavior: - type: string - enum: - - append - - replace - description: >- - (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: - Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: - Replaces the default system prompt with the provided system message. The - system message can include the string '{{function_definitions}}' to indicate - where the function definitions should be inserted. - default: append - additionalProperties: false - title: ToolConfig - description: Configuration for tool use. - TopKSamplingStrategy: - type: object - properties: - type: - type: string - const: top_k - default: top_k - description: >- - Must be "top_k" to identify this sampling strategy - top_k: - type: integer - description: >- - Number of top tokens to consider for sampling. Must be at least 1 - additionalProperties: false - required: - - type - - top_k - title: TopKSamplingStrategy - description: >- - Top-k sampling strategy that restricts sampling to the k most likely tokens. - TopPSamplingStrategy: - type: object - properties: - type: - type: string - const: top_p - default: top_p - description: >- - Must be "top_p" to identify this sampling strategy - temperature: - type: number - description: >- - Controls randomness in sampling. Higher values increase randomness - top_p: - type: number - default: 0.95 - description: >- - Cumulative probability threshold for nucleus sampling. Defaults to 0.95 - additionalProperties: false - required: - - type - title: TopPSamplingStrategy - description: >- - Top-p (nucleus) sampling strategy that samples from the smallest set of tokens - with cumulative probability >= p. - CreateAgentRequest: - type: object - properties: - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: The configuration for the agent. - additionalProperties: false - required: - - agent_config - title: CreateAgentRequest - AgentCreateResponse: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the created agent - additionalProperties: false - required: - - agent_id - title: AgentCreateResponse - description: >- - Response returned when creating a new agent. - Agent: - type: object - properties: - agent_id: - type: string - description: Unique identifier for the agent - agent_config: - $ref: '#/components/schemas/AgentConfig' - description: Configuration settings for the agent - created_at: - type: string - format: date-time - description: Timestamp when the agent was created - additionalProperties: false - required: - - agent_id - - agent_config - - created_at - title: Agent - description: >- - An agent instance with configuration and metadata. - CreateAgentSessionRequest: - type: object - properties: - session_name: - type: string - description: The name of the session to create. - additionalProperties: false - required: - - session_name - title: CreateAgentSessionRequest - AgentSessionCreateResponse: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the created session - additionalProperties: false - required: - - session_id - title: AgentSessionCreateResponse - description: >- - Response returned when creating a new agent session. - InferenceStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: inference - default: inference - model_response: - $ref: '#/components/schemas/CompletionMessage' - description: The response from the LLM. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - model_response - title: InferenceStep - description: An inference step in an agent turn. - MemoryRetrievalStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: memory_retrieval - default: memory_retrieval - vector_db_ids: - type: string - description: >- - The IDs of the vector databases to retrieve context from. - inserted_context: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The context retrieved from the vector databases. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - vector_db_ids - - inserted_context - title: MemoryRetrievalStep - description: >- - A memory retrieval step in an agent turn. - Session: - type: object - properties: - session_id: - type: string - description: >- - Unique identifier for the conversation session - session_name: - type: string - description: Human-readable name for the session - turns: - type: array - items: - $ref: '#/components/schemas/Turn' - description: >- - List of all turns that have occurred in this session - started_at: - type: string - format: date-time - description: Timestamp when the session was created - additionalProperties: false - required: - - session_id - - session_name - - turns - - started_at - title: Session - description: >- - A single session of an interaction with an Agentic System. - ShieldCallStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: shield_call - default: shield_call - violation: - $ref: '#/components/schemas/SafetyViolation' - description: The violation from the shield call. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - title: ShieldCallStep - description: A shield call step in an agent turn. - ToolExecutionStep: - type: object - properties: - turn_id: - type: string - description: The ID of the turn. - step_id: - type: string - description: The ID of the step. - started_at: - type: string - format: date-time - description: The time the step started. - completed_at: - type: string - format: date-time - description: The time the step completed. - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - title: StepType - description: Type of the step in an agent turn. - const: tool_execution - default: tool_execution - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: The tool calls to execute. - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: The tool responses from the tool calls. - additionalProperties: false - required: - - turn_id - - step_id - - step_type - - tool_calls - - tool_responses - title: ToolExecutionStep - description: A tool execution step in an agent turn. - ToolResponse: - type: object - properties: - call_id: - type: string - description: >- - Unique identifier for the tool call this response is for - tool_name: - oneOf: - - type: string - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - title: BuiltinTool - - type: string - description: Name of the tool that was invoked - content: - $ref: '#/components/schemas/InterleavedContent' - description: The response content from the tool - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool response - additionalProperties: false - required: - - call_id - - tool_name - - content - title: ToolResponse - description: Response from a tool invocation. - Turn: - type: object - properties: - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - session_id: - type: string - description: >- - Unique identifier for the conversation session - input_messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: >- - List of messages that initiated this turn - steps: - type: array - items: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - Ordered list of processing steps executed during this turn - output_message: - $ref: '#/components/schemas/CompletionMessage' - description: >- - The model's generated response containing content and metadata - output_attachments: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the attachment. - mime_type: - type: string - description: The MIME type of the attachment. - additionalProperties: false - required: - - content - - mime_type - title: Attachment - description: An attachment to an agent turn. - description: >- - (Optional) Files or media attached to the agent's response - started_at: - type: string - format: date-time - description: Timestamp when the turn began - completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the turn finished, if completed - additionalProperties: false - required: - - turn_id - - session_id - - input_messages - - steps - - output_message - - started_at - title: Turn - description: >- - A single turn in an interaction with an Agentic System. - CreateAgentTurnRequest: - type: object - properties: - messages: - type: array - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - description: List of messages to start the turn with. - stream: - type: boolean - description: >- - (Optional) If True, generate an SSE event stream of the response. Defaults - to False. - documents: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - additionalProperties: false - required: - - content - - mime_type - title: Document - description: A document to be used by an agent. - description: >- - (Optional) List of documents to create the turn with. - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - description: >- - (Optional) List of toolgroups to create the turn with, will be used in - addition to the agent's config toolgroups for the request. - tool_config: - $ref: '#/components/schemas/ToolConfig' - description: >- - (Optional) The tool configuration to create the turn with, will be used - to override the agent's tool_config. - additionalProperties: false - required: - - messages - title: CreateAgentTurnRequest - AgentTurnResponseEvent: - type: object - properties: - payload: - oneOf: - - $ref: '#/components/schemas/AgentTurnResponseStepStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload' - - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - discriminator: - propertyName: event_type - mapping: - step_start: '#/components/schemas/AgentTurnResponseStepStartPayload' - step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload' - step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' - turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' - turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' - description: >- - Event-specific payload containing event data - additionalProperties: false - required: - - payload - title: AgentTurnResponseEvent - description: >- - An event in an agent turn response stream. - AgentTurnResponseStepCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_complete - default: step_complete - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - step_details: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: Complete details of the executed step - additionalProperties: false - required: - - event_type - - step_type - - step_id - - step_details - title: AgentTurnResponseStepCompletePayload - description: >- - Payload for step completion events in agent turn responses. - AgentTurnResponseStepProgressPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_progress - default: step_progress - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - delta: - oneOf: - - $ref: '#/components/schemas/TextDelta' - - $ref: '#/components/schemas/ImageDelta' - - $ref: '#/components/schemas/ToolCallDelta' - discriminator: - propertyName: type - mapping: - text: '#/components/schemas/TextDelta' - image: '#/components/schemas/ImageDelta' - tool_call: '#/components/schemas/ToolCallDelta' - description: >- - Incremental content changes during step execution - additionalProperties: false - required: - - event_type - - step_type - - step_id - - delta - title: AgentTurnResponseStepProgressPayload - description: >- - Payload for step progress events in agent turn responses. - AgentTurnResponseStepStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: step_start - default: step_start - description: Type of event being reported - step_type: - type: string - enum: - - inference - - tool_execution - - shield_call - - memory_retrieval - description: Type of step being executed - step_id: - type: string - description: >- - Unique identifier for the step within a turn - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata for the step - additionalProperties: false - required: - - event_type - - step_type - - step_id - title: AgentTurnResponseStepStartPayload - description: >- - Payload for step start events in agent turn responses. - AgentTurnResponseStreamChunk: - type: object - properties: - event: - $ref: '#/components/schemas/AgentTurnResponseEvent' - description: >- - Individual event in the agent turn response stream - additionalProperties: false - required: - - event - title: AgentTurnResponseStreamChunk - description: Streamed agent turn completion response. - "AgentTurnResponseTurnAwaitingInputPayload": - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_awaiting_input - default: turn_awaiting_input - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Turn data when waiting for external tool responses - additionalProperties: false - required: - - event_type - - turn - title: >- - AgentTurnResponseTurnAwaitingInputPayload - description: >- - Payload for turn awaiting input events in agent turn responses. - AgentTurnResponseTurnCompletePayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_complete - default: turn_complete - description: Type of event being reported - turn: - $ref: '#/components/schemas/Turn' - description: >- - Complete turn data including all steps and results - additionalProperties: false - required: - - event_type - - turn - title: AgentTurnResponseTurnCompletePayload - description: >- - Payload for turn completion events in agent turn responses. - AgentTurnResponseTurnStartPayload: - type: object - properties: - event_type: - type: string - enum: - - step_start - - step_complete - - step_progress - - turn_start - - turn_complete - - turn_awaiting_input - const: turn_start - default: turn_start - description: Type of event being reported - turn_id: - type: string - description: >- - Unique identifier for the turn within a session - additionalProperties: false - required: - - event_type - - turn_id - title: AgentTurnResponseTurnStartPayload - description: >- - Payload for turn start events in agent turn responses. - ImageDelta: - type: object - properties: - type: - type: string - const: image - default: image - description: >- - Discriminator type of the delta. Always "image" - image: - type: string - contentEncoding: base64 - description: The incremental image data as bytes - additionalProperties: false - required: - - type - - image - title: ImageDelta - description: >- - An image content delta for streaming responses. - TextDelta: - type: object - properties: - type: - type: string - const: text - default: text - description: >- - Discriminator type of the delta. Always "text" - text: - type: string - description: The incremental text content - additionalProperties: false - required: - - type - - text - title: TextDelta - description: >- - A text content delta for streaming responses. - ToolCallDelta: - type: object - properties: - type: - type: string - const: tool_call - default: tool_call - description: >- - Discriminator type of the delta. Always "tool_call" - tool_call: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCall' - description: >- - Either an in-progress tool call string or the final parsed tool call - parse_status: - type: string - enum: - - started - - in_progress - - failed - - succeeded - description: Current parsing status of the tool call - additionalProperties: false - required: - - type - - tool_call - - parse_status - title: ToolCallDelta - description: >- - A tool call content delta for streaming responses. - ResumeAgentTurnRequest: - type: object - properties: - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: >- - The tool call responses to resume the turn with. - stream: - type: boolean - description: Whether to stream the response. - additionalProperties: false - required: - - tool_responses - title: ResumeAgentTurnRequest - AgentStepResponse: - type: object - properties: - step: - oneOf: - - $ref: '#/components/schemas/InferenceStep' - - $ref: '#/components/schemas/ToolExecutionStep' - - $ref: '#/components/schemas/ShieldCallStep' - - $ref: '#/components/schemas/MemoryRetrievalStep' - discriminator: - propertyName: step_type - mapping: - inference: '#/components/schemas/InferenceStep' - tool_execution: '#/components/schemas/ToolExecutionStep' - shield_call: '#/components/schemas/ShieldCallStep' - memory_retrieval: '#/components/schemas/MemoryRetrievalStep' - description: >- - The complete step data and execution details - additionalProperties: false - required: - - step - title: AgentStepResponse - description: >- - Response containing details of a specific agent step. - Benchmark: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_store - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: benchmark - default: benchmark - description: The resource type, always benchmark - dataset_id: - type: string - description: >- - Identifier of the dataset to use for the benchmark evaluation - scoring_functions: - type: array - items: - type: string - description: >- - List of scoring function identifiers to apply during evaluation - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Metadata for this evaluation task - additionalProperties: false - required: - - identifier - - provider_id - - type - - dataset_id - - scoring_functions - - metadata - title: Benchmark - description: >- - A benchmark resource for evaluating model performance. - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - RegisterBenchmarkRequest: - type: object - properties: - benchmark_id: - type: string - description: The ID of the benchmark to register. - dataset_id: - type: string - description: >- - The ID of the dataset to use for the benchmark. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the benchmark. - provider_benchmark_id: - type: string - description: >- - The ID of the provider benchmark to use for the benchmark. - provider_id: - type: string - description: >- - The ID of the provider to use for the benchmark. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The metadata to use for the benchmark. - additionalProperties: false - required: - - benchmark_id - - dataset_id - - scoring_functions - title: RegisterBenchmarkRequest - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - BenchmarkConfig: - type: object - properties: - eval_candidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - description: The candidate to evaluate. - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - Map between scoring function id and parameters for each scoring function - you want to run - num_examples: - type: integer - description: >- - (Optional) The number of examples to evaluate. If not provided, all examples - in the dataset will be evaluated - additionalProperties: false - required: - - eval_candidate - - scoring_params - title: BenchmarkConfig - description: >- - A benchmark configuration for evaluation. + description: Greedy sampling strategy that selects the highest probability token at each step. ModelCandidate: - type: object properties: type: type: string const: model + title: Type default: model model: type: string - description: The model ID to evaluate. + title: Model sampling_params: $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/SystemMessage' + title: SystemMessage + - type: 'null' + title: SystemMessage + type: object required: - - type - - model - - sampling_params + - model + - sampling_params title: ModelCandidate description: A model candidate for evaluation. - EvaluateRowsRequest: + SamplingParams: + properties: + strategy: + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + discriminator: + propertyName: type + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + max_tokens: + anyOf: + - type: integer + - type: 'null' + repetition_penalty: + anyOf: + - type: number + - type: 'null' + default: 1.0 + stop: + anyOf: + - items: + type: string + type: array + - type: 'null' type: object + title: SamplingParams + description: Sampling parameters. + SystemMessage: + properties: + role: + type: string + const: system + title: Role + default: system + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + type: object + required: + - content + title: SystemMessage + description: A system message providing instructions or context to the model. + TopKSamplingStrategy: + properties: + type: + type: string + const: top_k + title: Type + default: top_k + top_k: + type: integer + minimum: 1.0 + title: Top K + type: object + required: + - top_k + title: TopKSamplingStrategy + description: Top-k sampling strategy that restricts sampling to the k most likely tokens. + TopPSamplingStrategy: + properties: + type: + type: string + const: top_p + title: Type + default: top_p + temperature: + anyOf: + - type: number + minimum: 0.0 + - type: 'null' + top_p: + anyOf: + - type: number + - type: 'null' + default: 0.95 + type: object + required: + - temperature + title: TopPSamplingStrategy + description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + EvaluateRowsRequest: properties: input_rows: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to evaluate. - scoring_functions: type: array + title: Input Rows + scoring_functions: items: type: string - description: >- - The scoring functions to use for the evaluation. + type: array + title: Scoring Functions benchmark_config: $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false + type: object required: - - input_rows - - scoring_functions - - benchmark_config + - input_rows + - scoring_functions + - benchmark_config title: EvaluateRowsRequest EvaluateResponse: - type: object properties: generations: - type: array items: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The generations from the evaluation. + type: array + title: Generations scores: - type: object additionalProperties: $ref: '#/components/schemas/ScoringResult' - description: The scores from the evaluation. - additionalProperties: false + type: object + title: Scores + type: object required: - - generations - - scores + - generations + - scores title: EvaluateResponse description: The response from an evaluation. RunEvalRequest: - type: object properties: benchmark_config: $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false + type: object required: - - benchmark_config + - benchmark_config title: RunEvalRequest Job: - type: object properties: job_id: type: string - description: Unique identifier for the job + title: Job Id status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current execution status of the job - additionalProperties: false - required: - - job_id - - status - title: Job - description: >- - A job execution instance with status tracking. - RerankRequest: + $ref: '#/components/schemas/JobStatus' type: object + required: + - job_id + - status + title: Job + description: A job execution instance with status tracking. + RerankRequest: properties: model: type: string - description: >- - The identifier of the reranking model to use. + title: Model query: - oneOf: + anyOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam + items: + items: + anyOf: - type: string - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - The search query to rank items against. Can be a string, text content - part, or image content part. The input must not exceed the model's max - input token length. - items: + title: OpenAIChatCompletionContentPartImageParam + title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam type: array - items: - oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - List of items to rerank. Each item can be a string, text content part, - or image content part. Each input must not exceed the model's max input - token length. + title: Items max_num_results: - type: integer - description: >- - (Optional) Maximum number of results to return. Default: returns all. - additionalProperties: false + anyOf: + - type: integer + - type: 'null' + type: object required: - - model - - query - - items + - model + - query + - items title: RerankRequest RerankData: - type: object properties: index: type: integer - description: >- - The original index of the document in the input list + title: Index relevance_score: type: number - description: >- - The relevance score from the model output. Values are inverted when applicable - so that higher scores indicate greater relevance. - additionalProperties: false - required: - - index - - relevance_score - title: RerankData - description: >- - A single rerank result from a reranking response. - RerankResponse: + title: Relevance Score type: object + required: + - index + - relevance_score + title: RerankData + description: A single rerank result from a reranking response. + RerankResponse: properties: data: - type: array items: $ref: '#/components/schemas/RerankData' - description: >- - List of rerank result objects, sorted by relevance score (descending) - additionalProperties: false + type: array + title: Data + type: object required: - - data + - data title: RerankResponse description: Response from a reranking request. Checkpoint: - type: object properties: identifier: type: string - description: Unique identifier for the checkpoint + title: Identifier created_at: type: string format: date-time - description: >- - Timestamp when the checkpoint was created + title: Created At epoch: type: integer - description: >- - Training epoch when the checkpoint was saved + title: Epoch post_training_job_id: type: string - description: >- - Identifier of the training job that created this checkpoint + title: Post Training Job Id path: type: string - description: >- - File system path where the checkpoint is stored + title: Path training_metrics: - $ref: '#/components/schemas/PostTrainingMetric' - description: >- - (Optional) Training metrics associated with this checkpoint - additionalProperties: false + anyOf: + - $ref: '#/components/schemas/PostTrainingMetric' + title: PostTrainingMetric + - type: 'null' + title: PostTrainingMetric + type: object required: - - identifier - - created_at - - epoch - - post_training_job_id - - path + - identifier + - created_at + - epoch + - post_training_job_id + - path title: Checkpoint description: Checkpoint created during training runs. PostTrainingJobArtifactsResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - checkpoints + - job_uuid title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. PostTrainingMetric: - type: object properties: epoch: type: integer - description: Training epoch number + title: Epoch train_loss: type: number - description: Loss value on the training dataset + title: Train Loss validation_loss: type: number - description: Loss value on the validation dataset + title: Validation Loss perplexity: type: number - description: >- - Perplexity metric indicating model confidence - additionalProperties: false - required: - - epoch - - train_loss - - validation_loss - - perplexity - title: PostTrainingMetric - description: >- - Training metrics captured during post-training jobs. - CancelTrainingJobRequest: + title: Perplexity type: object + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: Training metrics captured during post-training jobs. + CancelTrainingJobRequest: properties: job_uuid: type: string - description: The UUID of the job to cancel. - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: CancelTrainingJobRequest PostTrainingJobStatusResponse: - type: object properties: job_uuid: type: string - description: Unique identifier for the training job + title: Job Uuid status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current status of the training job + $ref: '#/components/schemas/JobStatus' scheduled_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job was scheduled + anyOf: + - type: string + format: date-time + - type: 'null' started_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job execution began + anyOf: + - type: string + format: date-time + - type: 'null' completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job finished, if completed + anyOf: + - type: string + format: date-time + - type: 'null' resources_allocated: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Information about computational resources allocated to the - job + anyOf: + - additionalProperties: true + type: object + - type: 'null' checkpoints: - type: array items: $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false + type: array + title: Checkpoints + type: object required: - - job_uuid - - status - - checkpoints + - job_uuid + - status title: PostTrainingJobStatusResponse description: Status of a finetuning job. ListPostTrainingJobsResponse: - type: object properties: data: - type: array items: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - additionalProperties: false + $ref: '#/components/schemas/PostTrainingJob' + type: array + title: Data + type: object required: - - data + - data title: ListPostTrainingJobsResponse DPOAlignmentConfig: - type: object properties: beta: type: number - description: Temperature parameter for the DPO loss + title: Beta loss_type: $ref: '#/components/schemas/DPOLossType' default: sigmoid - description: The type of loss function to use for DPO - additionalProperties: false + type: object required: - - beta - - loss_type + - beta title: DPOAlignmentConfig - description: >- - Configuration for Direct Preference Optimization (DPO) alignment. + description: Configuration for Direct Preference Optimization (DPO) alignment. DPOLossType: type: string enum: - - sigmoid - - hinge - - ipo - - kto_pair + - sigmoid + - hinge + - ipo + - kto_pair title: DPOLossType DataConfig: - type: object properties: dataset_id: type: string - description: >- - Unique identifier for the training dataset + title: Dataset Id batch_size: type: integer - description: Number of samples per training batch + title: Batch Size shuffle: type: boolean - description: >- - Whether to shuffle the dataset during training + title: Shuffle data_format: $ref: '#/components/schemas/DatasetFormat' - description: >- - Format of the dataset (instruct or dialog) validation_dataset_id: - type: string - description: >- - (Optional) Unique identifier for the validation dataset + anyOf: + - type: string + - type: 'null' packed: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to pack multiple samples into a single sequence for - efficiency train_on_input: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to compute loss on input tokens as well as output tokens - additionalProperties: false + type: object required: - - dataset_id - - batch_size - - shuffle - - data_format + - dataset_id + - batch_size + - shuffle + - data_format title: DataConfig - description: >- - Configuration for training data and data loading. + description: Configuration for training data and data loading. DatasetFormat: type: string enum: - - instruct - - dialog + - instruct + - dialog title: DatasetFormat description: Format of the training dataset. EfficiencyConfig: - type: object properties: enable_activation_checkpointing: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use activation checkpointing to reduce memory usage enable_activation_offloading: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload activations to CPU to save GPU memory memory_efficient_fsdp_wrap: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use memory-efficient FSDP wrapping fsdp_cpu_offload: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to offload FSDP parameters to CPU - additionalProperties: false - title: EfficiencyConfig - description: >- - Configuration for memory and compute efficiency optimizations. - OptimizerConfig: type: object + title: EfficiencyConfig + description: Configuration for memory and compute efficiency optimizations. + OptimizerConfig: properties: optimizer_type: $ref: '#/components/schemas/OptimizerType' - description: >- - Type of optimizer to use (adam, adamw, or sgd) lr: type: number - description: Learning rate for the optimizer + title: Lr weight_decay: type: number - description: >- - Weight decay coefficient for regularization + title: Weight Decay num_warmup_steps: type: integer - description: Number of steps for learning rate warmup - additionalProperties: false + title: Num Warmup Steps + type: object required: - - optimizer_type - - lr - - weight_decay - - num_warmup_steps + - optimizer_type + - lr + - weight_decay + - num_warmup_steps title: OptimizerConfig - description: >- - Configuration parameters for the optimization algorithm. + description: Configuration parameters for the optimization algorithm. OptimizerType: type: string enum: - - adam - - adamw - - sgd + - adam + - adamw + - sgd title: OptimizerType - description: >- - Available optimizer algorithms for training. + description: Available optimizer algorithms for training. TrainingConfig: - type: object properties: n_epochs: type: integer - description: Number of training epochs to run + title: N Epochs max_steps_per_epoch: type: integer + title: Max Steps Per Epoch default: 1 - description: Maximum number of steps to run per epoch gradient_accumulation_steps: type: integer + title: Gradient Accumulation Steps default: 1 - description: >- - Number of steps to accumulate gradients before updating max_validation_steps: - type: integer + anyOf: + - type: integer + - type: 'null' default: 1 - description: >- - (Optional) Maximum number of validation steps per epoch data_config: - $ref: '#/components/schemas/DataConfig' - description: >- - (Optional) Configuration for data loading and formatting + anyOf: + - $ref: '#/components/schemas/DataConfig' + title: DataConfig + - type: 'null' + title: DataConfig optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - description: >- - (Optional) Configuration for the optimization algorithm + anyOf: + - $ref: '#/components/schemas/OptimizerConfig' + title: OptimizerConfig + - type: 'null' + title: OptimizerConfig efficiency_config: - $ref: '#/components/schemas/EfficiencyConfig' - description: >- - (Optional) Configuration for memory and compute optimizations + anyOf: + - $ref: '#/components/schemas/EfficiencyConfig' + title: EfficiencyConfig + - type: 'null' + title: EfficiencyConfig dtype: - type: string + anyOf: + - type: string + - type: 'null' default: bf16 - description: >- - (Optional) Data type for model parameters (bf16, fp16, fp32) - additionalProperties: false - required: - - n_epochs - - max_steps_per_epoch - - gradient_accumulation_steps - title: TrainingConfig - description: >- - Comprehensive configuration for the training process. - PreferenceOptimizeRequest: type: object + required: + - n_epochs + title: TrainingConfig + description: Comprehensive configuration for the training process. + PreferenceOptimizeRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid finetuned_model: type: string - description: The model to fine-tune. + title: Finetuned Model algorithm_config: $ref: '#/components/schemas/DPOAlignmentConfig' - description: The algorithm configuration. training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - additionalProperties: false + title: Logger Config + type: object required: - - job_uuid - - finetuned_model - - algorithm_config - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config title: PreferenceOptimizeRequest PostTrainingJob: - type: object properties: job_uuid: type: string - additionalProperties: false + title: Job Uuid + type: object required: - - job_uuid + - job_uuid title: PostTrainingJob AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' discriminator: - propertyName: type mapping: LoRA: '#/components/schemas/LoraFinetuningConfig' QAT: '#/components/schemas/QATFinetuningConfig' + propertyName: type + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + title: LoraFinetuningConfig | QATFinetuningConfig LoraFinetuningConfig: - type: object properties: type: type: string const: LoRA + title: Type default: LoRA - description: Algorithm type identifier, always "LoRA" lora_attn_modules: - type: array items: type: string - description: >- - List of attention module names to apply LoRA to + type: array + title: Lora Attn Modules apply_lora_to_mlp: type: boolean - description: Whether to apply LoRA to MLP layers + title: Apply Lora To Mlp apply_lora_to_output: type: boolean - description: >- - Whether to apply LoRA to output projection layers + title: Apply Lora To Output rank: type: integer - description: >- - Rank of the LoRA adaptation (lower rank = fewer parameters) + title: Rank alpha: type: integer - description: >- - LoRA scaling parameter that controls adaptation strength + title: Alpha use_dora: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) quantize_base: - type: boolean + anyOf: + - type: boolean + - type: 'null' default: false - description: >- - (Optional) Whether to quantize the base model weights - additionalProperties: false - required: - - type - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - title: LoraFinetuningConfig - description: >- - Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - QATFinetuningConfig: type: object + required: + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: properties: type: type: string const: QAT + title: Type default: QAT - description: Algorithm type identifier, always "QAT" quantizer_name: type: string - description: >- - Name of the quantization algorithm to use + title: Quantizer Name group_size: type: integer - description: Size of groups for grouped quantization - additionalProperties: false - required: - - type - - quantizer_name - - group_size - title: QATFinetuningConfig - description: >- - Configuration for Quantization-Aware Training (QAT) fine-tuning. - SupervisedFineTuneRequest: + title: Group Size type: object + required: + - quantizer_name + - group_size + title: QATFinetuningConfig + description: Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: properties: job_uuid: type: string - description: The UUID of the job to create. + title: Job Uuid training_config: $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. hyperparam_search_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. + title: Hyperparam Search Config logger_config: + additionalProperties: true type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. + title: Logger Config model: - type: string - description: The model to fine-tune. + anyOf: + - type: string + - type: 'null' + description: Model descriptor for training if not in provider config` checkpoint_dir: - type: string - description: The directory to save checkpoint(s) to. + anyOf: + - type: string + - type: 'null' algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' - description: The algorithm configuration. - additionalProperties: false + anyOf: + - oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + title: LoraFinetuningConfig + - $ref: '#/components/schemas/QATFinetuningConfig' + title: QATFinetuningConfig + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + title: LoraFinetuningConfig | QATFinetuningConfig + - type: 'null' + title: Algorithm Config + type: object required: - - job_uuid - - training_config - - hyperparam_search_config - - logger_config + - job_uuid + - training_config + - hyperparam_search_config + - logger_config title: SupervisedFineTuneRequest + RegisterModelRequest: + properties: + model_id: + type: string + title: Model Id + provider_model_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + model_type: + anyOf: + - $ref: '#/components/schemas/ModelType' + title: ModelType + - type: 'null' + title: ModelType + type: object + required: + - model_id + title: RegisterModelRequest + ParamType: + discriminator: + mapping: + array: '#/components/schemas/ArrayType' + boolean: '#/components/schemas/BooleanType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + json: '#/components/schemas/JsonType' + number: '#/components/schemas/NumberType' + object: '#/components/schemas/ObjectType' + string: '#/components/schemas/StringType' + union: '#/components/schemas/UnionType' + propertyName: type + oneOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + RegisterScoringFunctionRequest: + properties: + scoring_fn_id: + type: string + title: Scoring Fn Id + description: + type: string + title: Description + return_type: + anyOf: + - $ref: '#/components/schemas/StringType' + title: StringType + - $ref: '#/components/schemas/NumberType' + title: NumberType + - $ref: '#/components/schemas/BooleanType' + title: BooleanType + - $ref: '#/components/schemas/ArrayType' + title: ArrayType + - $ref: '#/components/schemas/ObjectType' + title: ObjectType + - $ref: '#/components/schemas/JsonType' + title: JsonType + - $ref: '#/components/schemas/UnionType' + title: UnionType + - $ref: '#/components/schemas/ChatCompletionInputType' + title: ChatCompletionInputType + - $ref: '#/components/schemas/CompletionInputType' + title: CompletionInputType + title: StringType | ... (9 variants) + provider_scoring_fn_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + title: LLMAsJudgeScoringFnParams + - $ref: '#/components/schemas/RegexParserScoringFnParams' + title: RegexParserScoringFnParams + - $ref: '#/components/schemas/BasicScoringFnParams' + title: BasicScoringFnParams + discriminator: + propertyName: type + mapping: + basic: '#/components/schemas/BasicScoringFnParams' + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams + - type: 'null' + title: Params + type: object + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + RegisterShieldRequest: + properties: + shield_id: + type: string + title: Shield Id + provider_shield_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + params: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - shield_id + title: RegisterShieldRequest + RegisterToolGroupRequest: + properties: + toolgroup_id: + type: string + title: Toolgroup Id + provider_id: + type: string + title: Provider Id + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + DataSource: + discriminator: + mapping: + rows: '#/components/schemas/RowsDataSource' + uri: '#/components/schemas/URIDataSource' + propertyName: type + oneOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + RegisterDatasetRequest: + properties: + purpose: + $ref: '#/components/schemas/DatasetPurpose' + source: + anyOf: + - $ref: '#/components/schemas/URIDataSource' + title: URIDataSource + - $ref: '#/components/schemas/RowsDataSource' + title: RowsDataSource + title: URIDataSource | RowsDataSource + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + dataset_id: + anyOf: + - type: string + - type: 'null' + type: object + required: + - purpose + - source + title: RegisterDatasetRequest + RegisterBenchmarkRequest: + properties: + benchmark_id: + type: string + title: Benchmark Id + dataset_id: + type: string + title: Dataset Id + scoring_functions: + items: + type: string + type: array + title: Scoring Functions + provider_benchmark_id: + anyOf: + - type: string + - type: 'null' + provider_id: + anyOf: + - type: string + - type: 'null' + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + type: object + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + AllowedToolsFilter: + properties: + tool_names: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: AllowedToolsFilter + description: Filter configuration for restricting which MCP tools can be used. + ApprovalFilter: + properties: + always: + anyOf: + - items: + type: string + type: array + - type: 'null' + never: + anyOf: + - items: + type: string + type: array + - type: 'null' + type: object + title: ApprovalFilter + description: Filter configuration for MCP tool approval requirements. + BatchError: + properties: + code: + anyOf: + - type: string + - type: 'null' + line: + anyOf: + - type: integer + - type: 'null' + message: + anyOf: + - type: string + - type: 'null' + param: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: BatchError + BatchRequestCounts: + properties: + completed: + type: integer + title: Completed + failed: + type: integer + title: Failed + total: + type: integer + title: Total + additionalProperties: true + type: object + required: + - completed + - failed + - total + title: BatchRequestCounts + BatchUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + input_tokens_details: + $ref: '#/components/schemas/InputTokensDetails' + output_tokens: + type: integer + title: Output Tokens + output_tokens_details: + $ref: '#/components/schemas/OutputTokensDetails' + total_tokens: + type: integer + title: Total Tokens + additionalProperties: true + type: object + required: + - input_tokens + - input_tokens_details + - output_tokens + - output_tokens_details + - total_tokens + title: BatchUsage + Body_openai_upload_file_v1_files_post: + properties: + file: + type: string + format: binary + title: File + purpose: + $ref: '#/components/schemas/OpenAIFilePurpose' + expires_after: + anyOf: + - $ref: '#/components/schemas/ExpiresAfter' + title: ExpiresAfter + - type: 'null' + title: ExpiresAfter + type: object + required: + - file + - purpose + title: Body_openai_upload_file_v1_files_post + Chunk-Input: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Input' + title: ImageContentItem-Input + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Input' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Input | TextContentItem + type: array + title: list[ImageContentItem-Input | TextContentItem] + title: string | list[ImageContentItem-Input | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + Chunk-Output: + properties: + content: + anyOf: + - type: string + - oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + - items: + oneOf: + - $ref: '#/components/schemas/ImageContentItem-Output' + title: ImageContentItem-Output + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/ImageContentItem-Output' + text: '#/components/schemas/TextContentItem' + title: ImageContentItem-Output | TextContentItem + type: array + title: list[ImageContentItem-Output | TextContentItem] + title: string | list[ImageContentItem-Output | TextContentItem] + chunk_id: + type: string + title: Chunk Id + metadata: + additionalProperties: true + type: object + title: Metadata + embedding: + anyOf: + - items: + type: number + type: array + - type: 'null' + chunk_metadata: + anyOf: + - $ref: '#/components/schemas/ChunkMetadata' + title: ChunkMetadata + - type: 'null' + title: ChunkMetadata + type: object + required: + - content + - chunk_id + title: Chunk + description: A chunk of content that can be inserted into a vector database. + ConversationItemInclude: + type: string + enum: + - web_search_call.action.sources + - code_interpreter_call.outputs + - computer_call_output.output.image_url + - file_search_call.results + - message.input_image.image_url + - message.output_text.logprobs + - reasoning.encrypted_content + title: ConversationItemInclude + description: Specify additional output data to include in the model response. + DatasetPurpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + title: DatasetPurpose + description: Purpose of the dataset. Each purpose has a required input data schema. + Errors: + properties: + data: + anyOf: + - items: + $ref: '#/components/schemas/BatchError' + type: array + - type: 'null' + object: + anyOf: + - type: string + - type: 'null' + additionalProperties: true + type: object + title: Errors + HealthStatus: + type: string + enum: + - OK + - Error + - Not Implemented + title: HealthStatus + ImageContentItem-Input: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + ImageContentItem-Output: + properties: + type: + type: string + const: image + title: Type + default: image + image: + $ref: '#/components/schemas/_URLOrData' + type: object + required: + - image + title: ImageContentItem + description: A image content item + InputTokensDetails: + properties: + cached_tokens: + type: integer + title: Cached Tokens + additionalProperties: true + type: object + required: + - cached_tokens + title: InputTokensDetails + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus + description: Status of a job execution. + MCPListToolsTool: + properties: + input_schema: + additionalProperties: true + type: object + title: Input Schema + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + type: object + required: + - input_schema + - name + title: MCPListToolsTool + description: Tool definition returned by MCP list tools operation. + OpenAIAssistantMessageParam-Input: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIAssistantMessageParam-Output: + properties: + role: + type: string + const: assistant + title: Role + default: assistant + content: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + type: array + title: list[OpenAIChatCompletionContentPartTextParam] + - type: 'null' + title: string | list[OpenAIChatCompletionContentPartTextParam] + name: + anyOf: + - type: string + - type: 'null' + tool_calls: + anyOf: + - items: + $ref: '#/components/schemas/OpenAIChatCompletionToolCall' + type: array + - type: 'null' + type: object + title: OpenAIAssistantMessageParam + description: A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + OpenAIChatCompletionUsageCompletionTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsageCompletionTokensDetails + description: Token details for output tokens in OpenAI chat completion usage. + OpenAIChatCompletionUsagePromptTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIChatCompletionUsagePromptTokensDetails + description: Token details for prompt tokens in OpenAI chat completion usage. + OpenAIResponseMessage-Input: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseMessage-Output: + properties: + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + title: OpenAIResponseInputMessageContentImage + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile' + title: OpenAIResponseInputMessageContentFile + discriminator: + propertyName: type + mapping: + input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + title: OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile + type: array + title: list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + title: OpenAIResponseOutputMessageContentOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + discriminator: + propertyName: type + mapping: + output_text: '#/components/schemas/OpenAIResponseOutputMessageContentOutputText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal + type: array + title: list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + title: string | list[OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile] | list[OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal] + role: + title: Role + type: string + enum: + - system + - developer + - user + - assistant + default: system + type: + type: string + const: message + title: Type + default: message + id: + anyOf: + - type: string + - type: 'null' + status: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + - role + title: OpenAIResponseMessage + description: |- + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + OpenAIResponseOutputMessageFileSearchToolCallResults: + properties: + attributes: + additionalProperties: true + type: object + title: Attributes + file_id: + type: string + title: File Id + filename: + type: string + title: Filename + score: + type: number + title: Score + text: + type: string + title: Text + type: object + required: + - attributes + - file_id + - filename + - score + - text + title: OpenAIResponseOutputMessageFileSearchToolCallResults + description: Search results returned by the file search operation. + OpenAIResponseTextFormat: + properties: + type: + title: Type + type: string + enum: + - text + - json_schema + - json_object + default: text + name: + anyOf: + - type: string + - type: 'null' + schema: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + description: + anyOf: + - type: string + - type: 'null' + strict: + anyOf: + - type: boolean + - type: 'null' + type: object + title: OpenAIResponseTextFormat + description: Configuration for Responses API text format. + OpenAIResponseUsageInputTokensDetails: + properties: + cached_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageInputTokensDetails + description: Token details for input tokens in OpenAI response usage. + OpenAIResponseUsageOutputTokensDetails: + properties: + reasoning_tokens: + anyOf: + - type: integer + - type: 'null' + type: object + title: OpenAIResponseUsageOutputTokensDetails + description: Token details for output tokens in OpenAI response usage. + OpenAIUserMessageParam-Input: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OpenAIUserMessageParam-Output: + properties: + role: + type: string + const: user + title: Role + default: user + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + title: OpenAIChatCompletionContentPartImageParam + - $ref: '#/components/schemas/OpenAIFile' + title: OpenAIFile + discriminator: + propertyName: type + mapping: + file: '#/components/schemas/OpenAIFile' + image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + title: OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile + type: array + title: list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + title: string | list[OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile] + name: + anyOf: + - type: string + - type: 'null' + type: object + required: + - content + title: OpenAIUserMessageParam + description: A message from the user in an OpenAI-compatible chat completion request. + OutputTokensDetails: + properties: + reasoning_tokens: + type: integer + title: Reasoning Tokens + additionalProperties: true + type: object + required: + - reasoning_tokens + title: OutputTokensDetails + SearchRankingOptions: + properties: + ranker: + anyOf: + - type: string + - type: 'null' + score_threshold: + anyOf: + - type: number + - type: 'null' + default: 0.0 + type: object + title: SearchRankingOptions + description: Options for ranking and filtering search results. + _URLOrData: + properties: + url: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + title: URL + data: + anyOf: + - type: string + - type: 'null' + contentEncoding: base64 + type: object + title: _URLOrData + description: A URL or a base64 encoded string + SamplingStrategy: + discriminator: + mapping: + greedy: '#/components/schemas/GreedySamplingStrategy' + top_k: '#/components/schemas/TopKSamplingStrategy' + top_p: '#/components/schemas/TopPSamplingStrategy' + propertyName: type + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + title: GreedySamplingStrategy + - $ref: '#/components/schemas/TopPSamplingStrategy' + title: TopPSamplingStrategy + - $ref: '#/components/schemas/TopKSamplingStrategy' + title: TopKSamplingStrategy + title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy + GrammarResponseFormat: + description: Configuration for grammar-guided response generation. + properties: + type: + const: grammar + default: grammar + title: Type + type: string + bnf: + additionalProperties: true + title: Bnf + type: object + required: + - bnf + title: GrammarResponseFormat + type: object + JsonSchemaResponseFormat: + description: Configuration for JSON schema-guided response generation. + properties: + type: + const: json_schema + default: json_schema + title: Type + type: string + json_schema: + additionalProperties: true + title: Json Schema + type: object + required: + - json_schema + title: JsonSchemaResponseFormat + type: object + ResponseFormat: + discriminator: + mapping: + grammar: '#/components/schemas/GrammarResponseFormat' + json_schema: '#/components/schemas/JsonSchemaResponseFormat' + propertyName: type + oneOf: + - $ref: '#/components/schemas/JsonSchemaResponseFormat' + title: JsonSchemaResponseFormat + - $ref: '#/components/schemas/GrammarResponseFormat' + title: GrammarResponseFormat + title: JsonSchemaResponseFormat | GrammarResponseFormat + OpenAIResponseContentPart: + discriminator: + mapping: + output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' + refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' + title: OpenAIResponseContentPartOutputText + - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + title: OpenAIResponseContentPartRefusal + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' + title: OpenAIResponseContentPartReasoningText + title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + MetricInResponse: + description: A metric value included in API responses. + properties: + metric: + title: Metric + type: string + value: + anyOf: + - type: integer + - type: number + title: integer | number + unit: + anyOf: + - type: string + - type: 'null' + nullable: true + required: + - metric + - value + title: MetricInResponse + type: object + TextDelta: + description: A text content delta for streaming responses. + properties: + type: + const: text + default: text + title: Type + type: string + text: + title: Text + type: string + required: + - text + title: TextDelta + type: object + ImageDelta: + description: An image content delta for streaming responses. + properties: + type: + const: image + default: image + title: Type + type: string + image: + format: binary + title: Image + type: string + required: + - image + title: ImageDelta + type: object + Fp8QuantizationConfig: + description: Configuration for 8-bit floating point quantization. + properties: + type: + const: fp8_mixed + default: fp8_mixed + title: Type + type: string + title: Fp8QuantizationConfig + type: object + Bf16QuantizationConfig: + description: Configuration for BFloat16 precision (typically no quantization). + properties: + type: + const: bf16 + default: bf16 + title: Type + type: string + title: Bf16QuantizationConfig + type: object + Int4QuantizationConfig: + description: Configuration for 4-bit integer quantization. + properties: + type: + const: int4_mixed + default: int4_mixed + title: Type + type: string + scheme: + anyOf: + - type: string + - type: 'null' + default: int4_weight_int8_dynamic_activation + title: Int4QuantizationConfig + type: object + UserMessage: + description: A message from the user in a chat conversation. + properties: + role: + const: user + default: user + title: Role + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + context: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + - type: 'null' + title: string | list[ImageContentItem | TextContentItem] + nullable: true + required: + - content + title: UserMessage + type: object + ToolResponseMessage: + description: A message representing the result of a tool invocation. + properties: + role: + const: tool + default: tool + title: Role + type: string + call_id: + title: Call Id + type: string + content: + anyOf: + - type: string + - discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + - items: + discriminator: + mapping: + image: '#/components/schemas/ImageContentItem' + text: '#/components/schemas/TextContentItem' + propertyName: type + oneOf: + - $ref: '#/components/schemas/ImageContentItem' + title: ImageContentItem + - $ref: '#/components/schemas/TextContentItem' + title: TextContentItem + title: ImageContentItem | TextContentItem + type: array + title: list[ImageContentItem | TextContentItem] + title: string | list[ImageContentItem | TextContentItem] + required: + - call_id + - content + title: ToolResponseMessage + type: object + TokenLogProbs: + description: Log probabilities for generated tokens. + properties: + logprobs_by_token: + additionalProperties: + type: number + title: Logprobs By Token + type: object + required: + - logprobs_by_token + title: TokenLogProbs + type: object + EmbeddingsResponse: + description: Response containing generated embeddings. + properties: + embeddings: + items: + items: + type: number + type: array + title: Embeddings + type: array + required: + - embeddings + title: EmbeddingsResponse + type: object + OpenAICompletionLogprobs: + description: |- + The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + properties: + text_offset: + anyOf: + - items: + type: integer + type: array + - type: 'null' + nullable: true + token_logprobs: + anyOf: + - items: + type: number + type: array + - type: 'null' + nullable: true + tokens: + anyOf: + - items: + type: string + type: array + - type: 'null' + nullable: true + top_logprobs: + anyOf: + - items: + additionalProperties: + type: number + type: object + type: array + - type: 'null' + nullable: true + title: OpenAICompletionLogprobs + type: object + VectorStoreCreateRequest: + description: Request to create a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + file_ids: + items: + type: string + title: File Ids + type: array + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + chunking_strategy: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + additionalProperties: true + title: Metadata + type: object + title: VectorStoreCreateRequest + type: object + VectorStoreModifyRequest: + description: Request to modify a vector store. + properties: + name: + anyOf: + - type: string + - type: 'null' + nullable: true + expires_after: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + metadata: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + title: VectorStoreModifyRequest + type: object + VectorStoreSearchRequest: + description: Request to search a vector store. + properties: + query: + anyOf: + - type: string + - items: + type: string + type: array + title: list[string] + title: string | list[string] + filters: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + max_num_results: + default: 10 + title: Max Num Results + type: integer + ranking_options: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + rewrite_query: + default: false + title: Rewrite Query + type: boolean + required: + - query + title: VectorStoreSearchRequest + type: object + DialogType: + description: Parameter type for dialog data with semantic output labels. + properties: + type: + const: dialog + default: dialog + title: Type + type: string + title: DialogType + type: object + ConversationMessage: + description: OpenAI-compatible message item for conversations. + properties: + id: + description: unique identifier for this message + title: Id + type: string + content: + description: message content + items: + additionalProperties: true + type: object + title: Content + type: array + role: + description: message role + title: Role + type: string + status: + description: message status + title: Status + type: string + type: + const: message + default: message + title: Type + type: string + object: + const: message + default: message + title: Object + type: string + required: + - id + - content + - role + - status + title: ConversationMessage + type: object + ConversationItemCreateRequest: + description: Request body for creating conversation items. + properties: + items: + description: Items to include in the conversation context. You may add up to 20 items at a time. + items: + discriminator: + mapping: + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + message: '#/components/schemas/OpenAIResponseMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + propertyName: type + oneOf: + - $ref: '#/components/schemas/OpenAIResponseMessage' + title: OpenAIResponseMessage + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + title: OpenAIResponseOutputMessageWebSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + title: OpenAIResponseOutputMessageFileSearchToolCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + title: OpenAIResponseOutputMessageFunctionToolCall + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + title: OpenAIResponseInputFunctionToolCallOutput + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + title: OpenAIResponseMCPApprovalRequest + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + title: OpenAIResponseMCPApprovalResponse + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + title: OpenAIResponseOutputMessageMCPCall + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' + title: OpenAIResponseOutputMessageMCPListTools + title: OpenAIResponseMessage | ... (9 variants) + maxItems: 20 + title: Items + type: array + required: + - items + title: ConversationItemCreateRequest + type: object + ToolGroupInput: + description: Input data for registering a tool group. + properties: + toolgroup_id: + title: Toolgroup Id + type: string + provider_id: + title: Provider Id + type: string + args: + anyOf: + - additionalProperties: true + type: object + - type: 'null' + nullable: true + mcp_endpoint: + anyOf: + - $ref: '#/components/schemas/URL' + title: URL + - type: 'null' + nullable: true + title: URL + required: + - toolgroup_id + - provider_id + title: ToolGroupInput + type: object + Api: + description: Enumeration of all available APIs in the Llama Stack system. + enum: + - providers + - inference + - safety + - agents + - batches + - vector_io + - datasetio + - scoring + - eval + - post_training + - tool_runtime + - models + - shields + - vector_stores + - datasets + - scoring_functions + - benchmarks + - tool_groups + - files + - prompts + - conversations + - inspect + title: Api + type: string + ProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + required: + - api + - provider_type + - config_class + title: ProviderSpec + type: object + InlineProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + container_image: + anyOf: + - type: string + - type: 'null' + description: |2 + + The container image to use for this implementation. If one is provided, pip_packages will be ignored. + If a provider depends on other providers, the dependencies MUST NOT specify a container image. + nullable: true + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + title: InlineProviderSpec + type: object + RemoteProviderSpec: + properties: + api: + $ref: '#/components/schemas/Api' + provider_type: + title: Provider Type + type: string + config_class: + description: Fully-qualified classname of the config for this provider + title: Config Class + type: string + api_dependencies: + description: Higher-level API surfaces may depend on other providers to provide their functionality + items: + $ref: '#/components/schemas/Api' + title: Api Dependencies + type: array + optional_api_dependencies: + items: + $ref: '#/components/schemas/Api' + title: Optional Api Dependencies + type: array + deprecation_warning: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated, specify the warning message here + nullable: true + deprecation_error: + anyOf: + - type: string + - type: 'null' + description: If this provider is deprecated and does NOT work, specify the error message here + nullable: true + module: + anyOf: + - type: string + - type: 'null' + description: |2- + + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + + nullable: true + pip_packages: + description: The pip dependencies needed for this implementation + items: + type: string + title: Pip Packages + type: array + provider_data_validator: + anyOf: + - type: string + - type: 'null' + nullable: true + is_external: + default: false + description: Notes whether this provider is an external provider. + title: Is External + type: boolean + deps__: + items: + type: string + title: Deps + type: array + adapter_type: + description: Unique identifier for this adapter + title: Adapter Type + type: string + description: + anyOf: + - type: string + - type: 'null' + description: |2 + + A description of the provider. This is used to display in the documentation. + nullable: true + required: + - api + - provider_type + - config_class + - adapter_type + title: RemoteProviderSpec + type: object + PostTrainingJobLogStream: + description: Stream of logs from a finetuning job. + properties: + job_uuid: + title: Job Uuid + type: string + log_lines: + items: + type: string + title: Log Lines + type: array + required: + - job_uuid + - log_lines + title: PostTrainingJobLogStream + type: object + RLHFAlgorithm: + description: Available reinforcement learning from human feedback algorithms. + enum: + - dpo + title: RLHFAlgorithm + type: string + PostTrainingRLHFRequest: + description: Request to finetune a model using reinforcement learning from human feedback. + properties: + job_uuid: + title: Job Uuid + type: string + finetuned_model: + $ref: '#/components/schemas/URL' + dataset_id: + title: Dataset Id + type: string + validation_dataset_id: + title: Validation Dataset Id + type: string + algorithm: + $ref: '#/components/schemas/RLHFAlgorithm' + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + hyperparam_search_config: + additionalProperties: true + title: Hyperparam Search Config + type: object + logger_config: + additionalProperties: true + title: Logger Config + type: object + required: + - job_uuid + - finetuned_model + - dataset_id + - validation_dataset_id + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + title: PostTrainingRLHFRequest + type: object responses: BadRequest400: description: The request was invalid or malformed @@ -13520,8 +13040,7 @@ components: title: Bad Request detail: The request was invalid or malformed TooManyRequests429: - description: >- - The client has sent too many requests in a given amount of time + description: The client has sent too many requests in a given amount of time content: application/json: schema: @@ -13529,11 +13048,9 @@ components: example: status: 429 title: Too Many Requests - detail: >- - You have exceeded the rate limit. Please try again later. + detail: You have exceeded the rate limit. Please try again later. InternalServerError500: - description: >- - The server encountered an unexpected error + description: The server encountered an unexpected error content: application/json: schema: @@ -13541,113 +13058,101 @@ components: example: status: 500 title: Internal Server Error - detail: >- - An unexpected error occurred. Our team has been notified. + detail: An unexpected error occurred DefaultError: - description: An unexpected error occurred + description: An error occurred content: application/json: schema: $ref: '#/components/schemas/Error' - example: - status: 0 - title: Error - detail: An unexpected error occurred -security: - - Default: [] tags: - - name: Agents - description: >- - APIs for creating and interacting with agentic systems. - x-displayName: Agents - - name: Benchmarks - description: '' - - name: Conversations - description: >- - Protocol for conversation management operations. - x-displayName: Conversations - - name: DatasetIO - description: '' - - name: Datasets - description: '' - - name: Eval - description: >- - Llama Stack Evaluation API for running evaluations on model and agent candidates. - x-displayName: Evaluations - - name: Files - description: >- - This API is used to upload documents that can be used with other Llama Stack - APIs. - x-displayName: Files - - name: Inference - description: >- - Llama Stack Inference API for generating completions, chat completions, and - embeddings. +- description: APIs for creating and interacting with agentic systems. + name: Agents + x-displayName: Agents +- description: |- + The API is designed to allow use of openai client libraries for seamless integration. + This API provides the following extensions: + - idempotent batch creation - This API provides the raw interface to the underlying models. Two kinds of models - are supported: + Note: This API is currently under active development and may undergo changes. + name: Batches + x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale. +- description: '' + name: Benchmarks +- description: Protocol for conversation management operations. + name: Conversations + x-displayName: Conversations +- description: '' + name: DatasetIO +- description: '' + name: Datasets +- description: Llama Stack Evaluation API for running evaluations on model and agent candidates. + name: Eval + x-displayName: Evaluations +- description: This API is used to upload documents that can be used with other Llama Stack APIs. + name: Files + x-displayName: Files +- description: |- + Llama Stack Inference API for generating completions, chat completions, and embeddings. - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - - Embedding models: these models generate embeddings to be used for semantic - search. - x-displayName: Inference - - name: Inspect - description: >- - APIs for inspecting the Llama Stack service, including health status, available - API routes with methods and implementing providers. - x-displayName: Inspect - - name: Models - description: '' - - name: PostTraining (Coming Soon) - description: '' - - name: Prompts - description: >- - Protocol for prompt management operations. - x-displayName: Prompts - - name: Providers - description: >- - Providers API for inspecting, listing, and modifying providers and their configurations. - x-displayName: Providers - - name: Safety - description: OpenAI-compatible Moderations API. - x-displayName: Safety - - name: Scoring - description: '' - - name: ScoringFunctions - description: '' - - name: Shields - description: '' - - name: SyntheticDataGeneration (Coming Soon) - description: '' - - name: ToolGroups - description: '' - - name: ToolRuntime - description: '' - - name: VectorIO - description: '' + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + name: Inference + x-displayName: Inference +- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + name: Inspect + x-displayName: Inspect +- description: '' + name: Models +- description: '' + name: PostTraining (Coming Soon) +- description: Protocol for prompt management operations. + name: Prompts + x-displayName: Prompts +- description: Providers API for inspecting, listing, and modifying providers and their configurations. + name: Providers + x-displayName: Providers +- description: OpenAI-compatible Moderations API. + name: Safety + x-displayName: Safety +- description: '' + name: Scoring +- description: '' + name: ScoringFunctions +- description: '' + name: Shields +- description: '' + name: ToolGroups +- description: '' + name: ToolRuntime +- description: '' + name: VectorIO x-tagGroups: - - name: Operations - tags: - - Agents - - Benchmarks - - Conversations - - DatasetIO - - Datasets - - Eval - - Files - - Inference - - Inspect - - Models - - PostTraining (Coming Soon) - - Prompts - - Providers - - Safety - - Scoring - - ScoringFunctions - - Shields - - SyntheticDataGeneration (Coming Soon) - - ToolGroups - - ToolRuntime - - VectorIO +- name: Operations + tags: + - Agents + - Batches + - Benchmarks + - Conversations + - DatasetIO + - Datasets + - Eval + - Files + - Inference + - Inspect + - Models + - PostTraining (Coming Soon) + - Prompts + - Providers + - Safety + - Scoring + - ScoringFunctions + - Shields + - ToolGroups + - ToolRuntime + - VectorIO +security: +- Default: [] diff --git a/llama_stack/__init__.py b/llama_stack/__init__.py deleted file mode 100644 index 1c2ce7123..000000000 --- a/llama_stack/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.core.library_client import ( # noqa: F401 - AsyncLlamaStackAsLibraryClient, - LlamaStackAsLibraryClient, -) diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py deleted file mode 100644 index 6416b283b..000000000 --- a/llama_stack/apis/agents/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .agents import * diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py deleted file mode 100644 index 6ad45cf99..000000000 --- a/llama_stack/apis/agents/agents.py +++ /dev/null @@ -1,894 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import AsyncIterator -from datetime import datetime -from enum import StrEnum -from typing import Annotated, Any, Literal, Protocol, runtime_checkable - -from pydantic import BaseModel, ConfigDict, Field - -from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent -from llama_stack.apis.common.responses import Order, PaginatedResponse -from llama_stack.apis.inference import ( - CompletionMessage, - ResponseFormat, - SamplingParams, - ToolCall, - ToolChoice, - ToolConfig, - ToolPromptFormat, - ToolResponse, - ToolResponseMessage, - UserMessage, -) -from llama_stack.apis.safety import SafetyViolation -from llama_stack.apis.tools import ToolDef -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod - -from .openai_responses import ( - ListOpenAIResponseInputItem, - ListOpenAIResponseObject, - OpenAIDeleteResponseObject, - OpenAIResponseInput, - OpenAIResponseInputTool, - OpenAIResponseObject, - OpenAIResponseObjectStream, - OpenAIResponseText, -) - - -@json_schema_type -class ResponseGuardrailSpec(BaseModel): - """Specification for a guardrail to apply during response generation. - - :param type: The type/identifier of the guardrail. - """ - - type: str - # TODO: more fields to be added for guardrail configuration - - -ResponseGuardrail = str | ResponseGuardrailSpec - - -class Attachment(BaseModel): - """An attachment to an agent turn. - - :param content: The content of the attachment. - :param mime_type: The MIME type of the attachment. - """ - - content: InterleavedContent | URL - mime_type: str - - -class Document(BaseModel): - """A document to be used by an agent. - - :param content: The content of the document. - :param mime_type: The MIME type of the document. - """ - - content: InterleavedContent | URL - mime_type: str - - -class StepCommon(BaseModel): - """A common step in an agent turn. - - :param turn_id: The ID of the turn. - :param step_id: The ID of the step. - :param started_at: The time the step started. - :param completed_at: The time the step completed. - """ - - turn_id: str - step_id: str - started_at: datetime | None = None - completed_at: datetime | None = None - - -class StepType(StrEnum): - """Type of the step in an agent turn. - - :cvar inference: The step is an inference step that calls an LLM. - :cvar tool_execution: The step is a tool execution step that executes a tool call. - :cvar shield_call: The step is a shield call step that checks for safety violations. - :cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs. - """ - - inference = "inference" - tool_execution = "tool_execution" - shield_call = "shield_call" - memory_retrieval = "memory_retrieval" - - -@json_schema_type -class InferenceStep(StepCommon): - """An inference step in an agent turn. - - :param model_response: The response from the LLM. - """ - - model_config = ConfigDict(protected_namespaces=()) - - step_type: Literal[StepType.inference] = StepType.inference - model_response: CompletionMessage - - -@json_schema_type -class ToolExecutionStep(StepCommon): - """A tool execution step in an agent turn. - - :param tool_calls: The tool calls to execute. - :param tool_responses: The tool responses from the tool calls. - """ - - step_type: Literal[StepType.tool_execution] = StepType.tool_execution - tool_calls: list[ToolCall] - tool_responses: list[ToolResponse] - - -@json_schema_type -class ShieldCallStep(StepCommon): - """A shield call step in an agent turn. - - :param violation: The violation from the shield call. - """ - - step_type: Literal[StepType.shield_call] = StepType.shield_call - violation: SafetyViolation | None - - -@json_schema_type -class MemoryRetrievalStep(StepCommon): - """A memory retrieval step in an agent turn. - - :param vector_db_ids: The IDs of the vector databases to retrieve context from. - :param inserted_context: The context retrieved from the vector databases. - """ - - step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval - # TODO: should this be List[str]? - vector_db_ids: str - inserted_context: InterleavedContent - - -Step = Annotated[ - InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep, - Field(discriminator="step_type"), -] - - -@json_schema_type -class Turn(BaseModel): - """A single turn in an interaction with an Agentic System. - - :param turn_id: Unique identifier for the turn within a session - :param session_id: Unique identifier for the conversation session - :param input_messages: List of messages that initiated this turn - :param steps: Ordered list of processing steps executed during this turn - :param output_message: The model's generated response containing content and metadata - :param output_attachments: (Optional) Files or media attached to the agent's response - :param started_at: Timestamp when the turn began - :param completed_at: (Optional) Timestamp when the turn finished, if completed - """ - - turn_id: str - session_id: str - input_messages: list[UserMessage | ToolResponseMessage] - steps: list[Step] - output_message: CompletionMessage - output_attachments: list[Attachment] | None = Field(default_factory=lambda: []) - - started_at: datetime - completed_at: datetime | None = None - - -@json_schema_type -class Session(BaseModel): - """A single session of an interaction with an Agentic System. - - :param session_id: Unique identifier for the conversation session - :param session_name: Human-readable name for the session - :param turns: List of all turns that have occurred in this session - :param started_at: Timestamp when the session was created - """ - - session_id: str - session_name: str - turns: list[Turn] - started_at: datetime - - -class AgentToolGroupWithArgs(BaseModel): - name: str - args: dict[str, Any] - - -AgentToolGroup = str | AgentToolGroupWithArgs -register_schema(AgentToolGroup, name="AgentTool") - - -class AgentConfigCommon(BaseModel): - sampling_params: SamplingParams | None = Field(default_factory=SamplingParams) - - input_shields: list[str] | None = Field(default_factory=lambda: []) - output_shields: list[str] | None = Field(default_factory=lambda: []) - toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: []) - client_tools: list[ToolDef] | None = Field(default_factory=lambda: []) - tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead") - tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead") - tool_config: ToolConfig | None = Field(default=None) - - max_infer_iters: int | None = 10 - - def model_post_init(self, __context): - if self.tool_config: - if self.tool_choice and self.tool_config.tool_choice != self.tool_choice: - raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.") - if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format: - raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.") - else: - params = {} - if self.tool_choice: - params["tool_choice"] = self.tool_choice - if self.tool_prompt_format: - params["tool_prompt_format"] = self.tool_prompt_format - self.tool_config = ToolConfig(**params) - - -@json_schema_type -class AgentConfig(AgentConfigCommon): - """Configuration for an agent. - - :param model: The model identifier to use for the agent - :param instructions: The system instructions for the agent - :param name: Optional name for the agent, used in telemetry and identification - :param enable_session_persistence: Optional flag indicating whether session data has to be persisted - :param response_format: Optional response format configuration - """ - - model: str - instructions: str - name: str | None = None - enable_session_persistence: bool | None = False - response_format: ResponseFormat | None = None - - -@json_schema_type -class Agent(BaseModel): - """An agent instance with configuration and metadata. - - :param agent_id: Unique identifier for the agent - :param agent_config: Configuration settings for the agent - :param created_at: Timestamp when the agent was created - """ - - agent_id: str - agent_config: AgentConfig - created_at: datetime - - -class AgentConfigOverridablePerTurn(AgentConfigCommon): - instructions: str | None = None - - -class AgentTurnResponseEventType(StrEnum): - step_start = "step_start" - step_complete = "step_complete" - step_progress = "step_progress" - - turn_start = "turn_start" - turn_complete = "turn_complete" - turn_awaiting_input = "turn_awaiting_input" - - -@json_schema_type -class AgentTurnResponseStepStartPayload(BaseModel): - """Payload for step start events in agent turn responses. - - :param event_type: Type of event being reported - :param step_type: Type of step being executed - :param step_id: Unique identifier for the step within a turn - :param metadata: (Optional) Additional metadata for the step - """ - - event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start - step_type: StepType - step_id: str - metadata: dict[str, Any] | None = Field(default_factory=lambda: {}) - - -@json_schema_type -class AgentTurnResponseStepCompletePayload(BaseModel): - """Payload for step completion events in agent turn responses. - - :param event_type: Type of event being reported - :param step_type: Type of step being executed - :param step_id: Unique identifier for the step within a turn - :param step_details: Complete details of the executed step - """ - - event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete - step_type: StepType - step_id: str - step_details: Step - - -@json_schema_type -class AgentTurnResponseStepProgressPayload(BaseModel): - """Payload for step progress events in agent turn responses. - - :param event_type: Type of event being reported - :param step_type: Type of step being executed - :param step_id: Unique identifier for the step within a turn - :param delta: Incremental content changes during step execution - """ - - model_config = ConfigDict(protected_namespaces=()) - - event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress - step_type: StepType - step_id: str - - delta: ContentDelta - - -@json_schema_type -class AgentTurnResponseTurnStartPayload(BaseModel): - """Payload for turn start events in agent turn responses. - - :param event_type: Type of event being reported - :param turn_id: Unique identifier for the turn within a session - """ - - event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start - turn_id: str - - -@json_schema_type -class AgentTurnResponseTurnCompletePayload(BaseModel): - """Payload for turn completion events in agent turn responses. - - :param event_type: Type of event being reported - :param turn: Complete turn data including all steps and results - """ - - event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete - turn: Turn - - -@json_schema_type -class AgentTurnResponseTurnAwaitingInputPayload(BaseModel): - """Payload for turn awaiting input events in agent turn responses. - - :param event_type: Type of event being reported - :param turn: Turn data when waiting for external tool responses - """ - - event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input - turn: Turn - - -AgentTurnResponseEventPayload = Annotated[ - AgentTurnResponseStepStartPayload - | AgentTurnResponseStepProgressPayload - | AgentTurnResponseStepCompletePayload - | AgentTurnResponseTurnStartPayload - | AgentTurnResponseTurnCompletePayload - | AgentTurnResponseTurnAwaitingInputPayload, - Field(discriminator="event_type"), -] -register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload") - - -@json_schema_type -class AgentTurnResponseEvent(BaseModel): - """An event in an agent turn response stream. - - :param payload: Event-specific payload containing event data - """ - - payload: AgentTurnResponseEventPayload - - -@json_schema_type -class AgentCreateResponse(BaseModel): - """Response returned when creating a new agent. - - :param agent_id: Unique identifier for the created agent - """ - - agent_id: str - - -@json_schema_type -class AgentSessionCreateResponse(BaseModel): - """Response returned when creating a new agent session. - - :param session_id: Unique identifier for the created session - """ - - session_id: str - - -@json_schema_type -class AgentTurnCreateRequest(AgentConfigOverridablePerTurn): - """Request to create a new turn for an agent. - - :param agent_id: Unique identifier for the agent - :param session_id: Unique identifier for the conversation session - :param messages: List of messages to start the turn with - :param documents: (Optional) List of documents to provide to the agent - :param toolgroups: (Optional) List of tool groups to make available for this turn - :param stream: (Optional) Whether to stream the response - :param tool_config: (Optional) Tool configuration to override agent defaults - """ - - agent_id: str - session_id: str - - # TODO: figure out how we can simplify this and make why - # ToolResponseMessage needs to be here (it is function call - # execution from outside the system) - messages: list[UserMessage | ToolResponseMessage] - - documents: list[Document] | None = None - toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: []) - - stream: bool | None = False - tool_config: ToolConfig | None = None - - -@json_schema_type -class AgentTurnResumeRequest(BaseModel): - """Request to resume an agent turn with tool responses. - - :param agent_id: Unique identifier for the agent - :param session_id: Unique identifier for the conversation session - :param turn_id: Unique identifier for the turn within a session - :param tool_responses: List of tool responses to submit to continue the turn - :param stream: (Optional) Whether to stream the response - """ - - agent_id: str - session_id: str - turn_id: str - tool_responses: list[ToolResponse] - stream: bool | None = False - - -@json_schema_type -class AgentTurnResponseStreamChunk(BaseModel): - """Streamed agent turn completion response. - - :param event: Individual event in the agent turn response stream - """ - - event: AgentTurnResponseEvent - - -@json_schema_type -class AgentStepResponse(BaseModel): - """Response containing details of a specific agent step. - - :param step: The complete step data and execution details - """ - - step: Step - - -@runtime_checkable -class Agents(Protocol): - """Agents - - APIs for creating and interacting with agentic systems.""" - - @webmethod( - route="/agents", - method="POST", - descriptive_name="create_agent", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents", - method="POST", - descriptive_name="create_agent", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def create_agent( - self, - agent_config: AgentConfig, - ) -> AgentCreateResponse: - """Create an agent with the given configuration. - - :param agent_config: The configuration for the agent. - :returns: An AgentCreateResponse with the agent ID. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn", - method="POST", - descriptive_name="create_agent_turn", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn", - method="POST", - descriptive_name="create_agent_turn", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def create_agent_turn( - self, - agent_id: str, - session_id: str, - messages: list[UserMessage | ToolResponseMessage], - stream: bool | None = False, - documents: list[Document] | None = None, - toolgroups: list[AgentToolGroup] | None = None, - tool_config: ToolConfig | None = None, - ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]: - """Create a new turn for an agent. - - :param agent_id: The ID of the agent to create the turn for. - :param session_id: The ID of the session to create the turn for. - :param messages: List of messages to start the turn with. - :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False. - :param documents: (Optional) List of documents to create the turn with. - :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request. - :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config. - :returns: If stream=False, returns a Turn object. - If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", - method="POST", - descriptive_name="resume_agent_turn", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", - method="POST", - descriptive_name="resume_agent_turn", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def resume_agent_turn( - self, - agent_id: str, - session_id: str, - turn_id: str, - tool_responses: list[ToolResponse], - stream: bool | None = False, - ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]: - """Resume an agent turn with executed tool call responses. - - When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready. - - :param agent_id: The ID of the agent to resume. - :param session_id: The ID of the session to resume. - :param turn_id: The ID of the turn to resume. - :param tool_responses: The tool call responses to resume the turn with. - :param stream: Whether to stream the response. - :returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", - method="GET", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", - method="GET", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def get_agents_turn( - self, - agent_id: str, - session_id: str, - turn_id: str, - ) -> Turn: - """Retrieve an agent turn by its ID. - - :param agent_id: The ID of the agent to get the turn for. - :param session_id: The ID of the session to get the turn for. - :param turn_id: The ID of the turn to get. - :returns: A Turn. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", - method="GET", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", - method="GET", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def get_agents_step( - self, - agent_id: str, - session_id: str, - turn_id: str, - step_id: str, - ) -> AgentStepResponse: - """Retrieve an agent step by its ID. - - :param agent_id: The ID of the agent to get the step for. - :param session_id: The ID of the session to get the step for. - :param turn_id: The ID of the turn to get the step for. - :param step_id: The ID of the step to get. - :returns: An AgentStepResponse. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session", - method="POST", - descriptive_name="create_agent_session", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session", - method="POST", - descriptive_name="create_agent_session", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def create_agent_session( - self, - agent_id: str, - session_name: str, - ) -> AgentSessionCreateResponse: - """Create a new session for an agent. - - :param agent_id: The ID of the agent to create the session for. - :param session_name: The name of the session to create. - :returns: An AgentSessionCreateResponse. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}", - method="GET", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}", - method="GET", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def get_agents_session( - self, - session_id: str, - agent_id: str, - turn_ids: list[str] | None = None, - ) -> Session: - """Retrieve an agent session by its ID. - - :param session_id: The ID of the session to get. - :param agent_id: The ID of the agent to get the session for. - :param turn_ids: (Optional) List of turn IDs to filter the session by. - :returns: A Session. - """ - ... - - @webmethod( - route="/agents/{agent_id}/session/{session_id}", - method="DELETE", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/agents/{agent_id}/session/{session_id}", - method="DELETE", - level=LLAMA_STACK_API_V1ALPHA, - ) - async def delete_agents_session( - self, - session_id: str, - agent_id: str, - ) -> None: - """Delete an agent session by its ID and its associated turns. - - :param session_id: The ID of the session to delete. - :param agent_id: The ID of the agent to delete the session for. - """ - ... - - @webmethod( - route="/agents/{agent_id}", - method="DELETE", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) - async def delete_agent( - self, - agent_id: str, - ) -> None: - """Delete an agent by its ID and its associated sessions and turns. - - :param agent_id: The ID of the agent to delete. - """ - ... - - @webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: - """List all agents. - - :param start_index: The index to start the pagination from. - :param limit: The number of agents to return. - :returns: A PaginatedResponse. - """ - ... - - @webmethod( - route="/agents/{agent_id}", - method="GET", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_agent(self, agent_id: str) -> Agent: - """Describe an agent by its ID. - - :param agent_id: ID of the agent. - :returns: An Agent of the agent. - """ - ... - - @webmethod( - route="/agents/{agent_id}/sessions", - method="GET", - deprecated=True, - level=LLAMA_STACK_API_V1, - ) - @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def list_agent_sessions( - self, - agent_id: str, - start_index: int | None = None, - limit: int | None = None, - ) -> PaginatedResponse: - """List all session(s) of a given agent. - - :param agent_id: The ID of the agent to list sessions for. - :param start_index: The index to start the pagination from. - :param limit: The number of sessions to return. - :returns: A PaginatedResponse. - """ - ... - - # We situate the OpenAI Responses API in the Agents API just like we did things - # for Inference. The Responses API, in its intent, serves the same purpose as - # the Agents API above -- it is essentially a lightweight "agentic loop" with - # integrated tool calling. - # - # Both of these APIs are inherently stateful. - - @webmethod( - route="/openai/v1/responses/{response_id}", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1) - async def get_openai_response( - self, - response_id: str, - ) -> OpenAIResponseObject: - """Get a model response. - - :param response_id: The ID of the OpenAI response to retrieve. - :returns: An OpenAIResponseObject. - """ - ... - - @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1) - async def create_openai_response( - self, - input: str | list[OpenAIResponseInput], - model: str, - instructions: str | None = None, - previous_response_id: str | None = None, - conversation: str | None = None, - store: bool | None = True, - stream: bool | None = False, - temperature: float | None = None, - text: OpenAIResponseText | None = None, - tools: list[OpenAIResponseInputTool] | None = None, - include: list[str] | None = None, - max_infer_iters: int | None = 10, # this is an extension to the OpenAI API - guardrails: Annotated[ - list[ResponseGuardrail] | None, - ExtraBodyField( - "List of guardrails to apply during response generation. Guardrails provide safety and content moderation." - ), - ] = None, - ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: - """Create a model response. - - :param input: Input message(s) to create the response. - :param model: The underlying LLM used for completions. - :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. - :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation. - :param include: (Optional) Additional fields to include in the response. - :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications. - :returns: An OpenAIResponseObject. - """ - ... - - @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1) - async def list_openai_responses( - self, - after: str | None = None, - limit: int | None = 50, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseObject: - """List all responses. - - :param after: The ID of the last response to return. - :param limit: The number of responses to return. - :param model: The model to filter responses by. - :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc'). - :returns: A ListOpenAIResponseObject. - """ - ... - - @webmethod( - route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1) - async def list_openai_response_input_items( - self, - response_id: str, - after: str | None = None, - before: str | None = None, - include: list[str] | None = None, - limit: int | None = 20, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseInputItem: - """List input items. - - :param response_id: The ID of the response to retrieve input items for. - :param after: An item ID to list items after, used for pagination. - :param before: An item ID to list items before, used for pagination. - :param include: Additional fields to include in the response. - :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. - :param order: The order to return the input items in. Default is desc. - :returns: An ListOpenAIResponseInputItem. - """ - ... - - @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1) - async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: - """Delete a response. - - :param response_id: The ID of the OpenAI response to delete. - :returns: An OpenAIDeleteResponseObject - """ - ... diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py deleted file mode 100644 index 821d6a8af..000000000 --- a/llama_stack/apis/agents/openai_responses.py +++ /dev/null @@ -1,1311 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Annotated, Any, Literal - -from pydantic import BaseModel, Field -from typing_extensions import TypedDict - -from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions -from llama_stack.schema_utils import json_schema_type, register_schema - -# NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably -# take their YAML and generate this file automatically. Their YAML is available. - - -@json_schema_type -class OpenAIResponseError(BaseModel): - """Error details for failed OpenAI response requests. - - :param code: Error code identifying the type of failure - :param message: Human-readable error message describing the failure - """ - - code: str - message: str - - -@json_schema_type -class OpenAIResponseInputMessageContentText(BaseModel): - """Text content for input messages in OpenAI response format. - - :param text: The text content of the input message - :param type: Content type identifier, always "input_text" - """ - - text: str - type: Literal["input_text"] = "input_text" - - -@json_schema_type -class OpenAIResponseInputMessageContentImage(BaseModel): - """Image content for input messages in OpenAI response format. - - :param detail: Level of detail for image processing, can be "low", "high", or "auto" - :param type: Content type identifier, always "input_image" - :param image_url: (Optional) URL of the image content - """ - - detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" - type: Literal["input_image"] = "input_image" - # TODO: handle file_id - image_url: str | None = None - - -# TODO: handle file content types -OpenAIResponseInputMessageContent = Annotated[ - OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage, - Field(discriminator="type"), -] -register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") - - -@json_schema_type -class OpenAIResponseAnnotationFileCitation(BaseModel): - """File citation annotation for referencing specific files in response content. - - :param type: Annotation type identifier, always "file_citation" - :param file_id: Unique identifier of the referenced file - :param filename: Name of the referenced file - :param index: Position index of the citation within the content - """ - - type: Literal["file_citation"] = "file_citation" - file_id: str - filename: str - index: int - - -@json_schema_type -class OpenAIResponseAnnotationCitation(BaseModel): - """URL citation annotation for referencing external web resources. - - :param type: Annotation type identifier, always "url_citation" - :param end_index: End position of the citation span in the content - :param start_index: Start position of the citation span in the content - :param title: Title of the referenced web resource - :param url: URL of the referenced web resource - """ - - type: Literal["url_citation"] = "url_citation" - end_index: int - start_index: int - title: str - url: str - - -@json_schema_type -class OpenAIResponseAnnotationContainerFileCitation(BaseModel): - type: Literal["container_file_citation"] = "container_file_citation" - container_id: str - end_index: int - file_id: str - filename: str - start_index: int - - -@json_schema_type -class OpenAIResponseAnnotationFilePath(BaseModel): - type: Literal["file_path"] = "file_path" - file_id: str - index: int - - -OpenAIResponseAnnotations = Annotated[ - OpenAIResponseAnnotationFileCitation - | OpenAIResponseAnnotationCitation - | OpenAIResponseAnnotationContainerFileCitation - | OpenAIResponseAnnotationFilePath, - Field(discriminator="type"), -] -register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") - - -@json_schema_type -class OpenAIResponseOutputMessageContentOutputText(BaseModel): - text: str - type: Literal["output_text"] = "output_text" - annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) - - -@json_schema_type -class OpenAIResponseContentPartRefusal(BaseModel): - """Refusal content within a streamed response part. - - :param type: Content part type identifier, always "refusal" - :param refusal: Refusal text supplied by the model - """ - - type: Literal["refusal"] = "refusal" - refusal: str - - -OpenAIResponseOutputMessageContent = Annotated[ - OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal, - Field(discriminator="type"), -] -register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent") - - -@json_schema_type -class OpenAIResponseMessage(BaseModel): - """ - Corresponds to the various Message types in the Responses API. - They are all under one type because the Responses API gives them all - the same "type" value, and there is no way to tell them apart in certain - scenarios. - """ - - content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent] - role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] - type: Literal["message"] = "message" - - # The fields below are not used in all scenarios, but are required in others. - id: str | None = None - status: str | None = None - - -@json_schema_type -class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel): - """Web search tool call output message for OpenAI responses. - - :param id: Unique identifier for this tool call - :param status: Current status of the web search operation - :param type: Tool call type identifier, always "web_search_call" - """ - - id: str - status: str - type: Literal["web_search_call"] = "web_search_call" - - -class OpenAIResponseOutputMessageFileSearchToolCallResults(BaseModel): - """Search results returned by the file search operation. - - :param attributes: (Optional) Key-value attributes associated with the file - :param file_id: Unique identifier of the file containing the result - :param filename: Name of the file containing the result - :param score: Relevance score for this search result (between 0 and 1) - :param text: Text content of the search result - """ - - attributes: dict[str, Any] - file_id: str - filename: str - score: float - text: str - - -@json_schema_type -class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel): - """File search tool call output message for OpenAI responses. - - :param id: Unique identifier for this tool call - :param queries: List of search queries executed - :param status: Current status of the file search operation - :param type: Tool call type identifier, always "file_search_call" - :param results: (Optional) Search results returned by the file search operation - """ - - id: str - queries: list[str] - status: str - type: Literal["file_search_call"] = "file_search_call" - results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None - - -@json_schema_type -class OpenAIResponseOutputMessageFunctionToolCall(BaseModel): - """Function tool call output message for OpenAI responses. - - :param call_id: Unique identifier for the function call - :param name: Name of the function being called - :param arguments: JSON string containing the function arguments - :param type: Tool call type identifier, always "function_call" - :param id: (Optional) Additional identifier for the tool call - :param status: (Optional) Current status of the function call execution - """ - - call_id: str - name: str - arguments: str - type: Literal["function_call"] = "function_call" - id: str | None = None - status: str | None = None - - -@json_schema_type -class OpenAIResponseOutputMessageMCPCall(BaseModel): - """Model Context Protocol (MCP) call output message for OpenAI responses. - - :param id: Unique identifier for this MCP call - :param type: Tool call type identifier, always "mcp_call" - :param arguments: JSON string containing the MCP call arguments - :param name: Name of the MCP method being called - :param server_label: Label identifying the MCP server handling the call - :param error: (Optional) Error message if the MCP call failed - :param output: (Optional) Output result from the successful MCP call - """ - - id: str - type: Literal["mcp_call"] = "mcp_call" - arguments: str - name: str - server_label: str - error: str | None = None - output: str | None = None - - -class MCPListToolsTool(BaseModel): - """Tool definition returned by MCP list tools operation. - - :param input_schema: JSON schema defining the tool's input parameters - :param name: Name of the tool - :param description: (Optional) Description of what the tool does - """ - - input_schema: dict[str, Any] - name: str - description: str | None = None - - -@json_schema_type -class OpenAIResponseOutputMessageMCPListTools(BaseModel): - """MCP list tools output message containing available tools from an MCP server. - - :param id: Unique identifier for this MCP list tools operation - :param type: Tool call type identifier, always "mcp_list_tools" - :param server_label: Label identifying the MCP server providing the tools - :param tools: List of available tools provided by the MCP server - """ - - id: str - type: Literal["mcp_list_tools"] = "mcp_list_tools" - server_label: str - tools: list[MCPListToolsTool] - - -@json_schema_type -class OpenAIResponseMCPApprovalRequest(BaseModel): - """ - A request for human approval of a tool invocation. - """ - - arguments: str - id: str - name: str - server_label: str - type: Literal["mcp_approval_request"] = "mcp_approval_request" - - -@json_schema_type -class OpenAIResponseMCPApprovalResponse(BaseModel): - """ - A response to an MCP approval request. - """ - - approval_request_id: str - approve: bool - type: Literal["mcp_approval_response"] = "mcp_approval_response" - id: str | None = None - reason: str | None = None - - -OpenAIResponseOutput = Annotated[ - OpenAIResponseMessage - | OpenAIResponseOutputMessageWebSearchToolCall - | OpenAIResponseOutputMessageFileSearchToolCall - | OpenAIResponseOutputMessageFunctionToolCall - | OpenAIResponseOutputMessageMCPCall - | OpenAIResponseOutputMessageMCPListTools - | OpenAIResponseMCPApprovalRequest, - Field(discriminator="type"), -] -register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput") - - -# This has to be a TypedDict because we need a "schema" field and our strong -# typing code in the schema generator doesn't support Pydantic aliases. That also -# means we can't use a discriminator field here, because TypedDicts don't support -# default values which the strong typing code requires for discriminators. -class OpenAIResponseTextFormat(TypedDict, total=False): - """Configuration for Responses API text format. - - :param type: Must be "text", "json_schema", or "json_object" to identify the format type - :param name: The name of the response format. Only used for json_schema. - :param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema. - :param description: (Optional) A description of the response format. Only used for json_schema. - :param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema. - """ - - type: Literal["text"] | Literal["json_schema"] | Literal["json_object"] - name: str | None - schema: dict[str, Any] | None - description: str | None - strict: bool | None - - -@json_schema_type -class OpenAIResponseText(BaseModel): - """Text response configuration for OpenAI responses. - - :param format: (Optional) Text format configuration specifying output format requirements - """ - - format: OpenAIResponseTextFormat | None = None - - -# Must match type Literals of OpenAIResponseInputToolWebSearch below -WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"] - - -@json_schema_type -class OpenAIResponseInputToolWebSearch(BaseModel): - """Web search tool configuration for OpenAI response inputs. - - :param type: Web search tool type variant to use - :param search_context_size: (Optional) Size of search context, must be "low", "medium", or "high" - """ - - # Must match values of WebSearchToolTypes above - type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = ( - "web_search" - ) - # TODO: actually use search_context_size somewhere... - search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") - # TODO: add user_location - - -@json_schema_type -class OpenAIResponseInputToolFunction(BaseModel): - """Function tool configuration for OpenAI response inputs. - - :param type: Tool type identifier, always "function" - :param name: Name of the function that can be called - :param description: (Optional) Description of what the function does - :param parameters: (Optional) JSON schema defining the function's parameters - :param strict: (Optional) Whether to enforce strict parameter validation - """ - - type: Literal["function"] = "function" - name: str - description: str | None = None - parameters: dict[str, Any] | None - strict: bool | None = None - - -@json_schema_type -class OpenAIResponseInputToolFileSearch(BaseModel): - """File search tool configuration for OpenAI response inputs. - - :param type: Tool type identifier, always "file_search" - :param vector_store_ids: List of vector store identifiers to search within - :param filters: (Optional) Additional filters to apply to the search - :param max_num_results: (Optional) Maximum number of search results to return (1-50) - :param ranking_options: (Optional) Options for ranking and scoring search results - """ - - type: Literal["file_search"] = "file_search" - vector_store_ids: list[str] - filters: dict[str, Any] | None = None - max_num_results: int | None = Field(default=10, ge=1, le=50) - ranking_options: FileSearchRankingOptions | None = None - - -class ApprovalFilter(BaseModel): - """Filter configuration for MCP tool approval requirements. - - :param always: (Optional) List of tool names that always require approval - :param never: (Optional) List of tool names that never require approval - """ - - always: list[str] | None = None - never: list[str] | None = None - - -class AllowedToolsFilter(BaseModel): - """Filter configuration for restricting which MCP tools can be used. - - :param tool_names: (Optional) List of specific tool names that are allowed - """ - - tool_names: list[str] | None = None - - -@json_schema_type -class OpenAIResponseInputToolMCP(BaseModel): - """Model Context Protocol (MCP) tool configuration for OpenAI response inputs. - - :param type: Tool type identifier, always "mcp" - :param server_label: Label to identify this MCP server - :param server_url: URL endpoint of the MCP server - :param headers: (Optional) HTTP headers to include when connecting to the server - :param require_approval: Approval requirement for tool calls ("always", "never", or filter) - :param allowed_tools: (Optional) Restriction on which tools can be used from this server - """ - - type: Literal["mcp"] = "mcp" - server_label: str - server_url: str - headers: dict[str, Any] | None = None - - require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never" - allowed_tools: list[str] | AllowedToolsFilter | None = None - - -OpenAIResponseInputTool = Annotated[ - OpenAIResponseInputToolWebSearch - | OpenAIResponseInputToolFileSearch - | OpenAIResponseInputToolFunction - | OpenAIResponseInputToolMCP, - Field(discriminator="type"), -] -register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool") - - -@json_schema_type -class OpenAIResponseToolMCP(BaseModel): - """Model Context Protocol (MCP) tool configuration for OpenAI response object. - - :param type: Tool type identifier, always "mcp" - :param server_label: Label to identify this MCP server - :param allowed_tools: (Optional) Restriction on which tools can be used from this server - """ - - type: Literal["mcp"] = "mcp" - server_label: str - allowed_tools: list[str] | AllowedToolsFilter | None = None - - -OpenAIResponseTool = Annotated[ - OpenAIResponseInputToolWebSearch - | OpenAIResponseInputToolFileSearch - | OpenAIResponseInputToolFunction - | OpenAIResponseToolMCP, # The only type that differes from that in the inputs is the MCP tool - Field(discriminator="type"), -] -register_schema(OpenAIResponseTool, name="OpenAIResponseTool") - - -class OpenAIResponseUsageOutputTokensDetails(BaseModel): - """Token details for output tokens in OpenAI response usage. - - :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models) - """ - - reasoning_tokens: int | None = None - - -class OpenAIResponseUsageInputTokensDetails(BaseModel): - """Token details for input tokens in OpenAI response usage. - - :param cached_tokens: Number of tokens retrieved from cache - """ - - cached_tokens: int | None = None - - -@json_schema_type -class OpenAIResponseUsage(BaseModel): - """Usage information for OpenAI response. - - :param input_tokens: Number of tokens in the input - :param output_tokens: Number of tokens in the output - :param total_tokens: Total tokens used (input + output) - :param input_tokens_details: Detailed breakdown of input token usage - :param output_tokens_details: Detailed breakdown of output token usage - """ - - input_tokens: int - output_tokens: int - total_tokens: int - input_tokens_details: OpenAIResponseUsageInputTokensDetails | None = None - output_tokens_details: OpenAIResponseUsageOutputTokensDetails | None = None - - -@json_schema_type -class OpenAIResponseObject(BaseModel): - """Complete OpenAI response object containing generation results and metadata. - - :param created_at: Unix timestamp when the response was created - :param error: (Optional) Error details if the response generation failed - :param id: Unique identifier for this response - :param model: Model identifier used for generation - :param object: Object type identifier, always "response" - :param output: List of generated output items (messages, tool calls, etc.) - :param parallel_tool_calls: Whether tool calls can be executed in parallel - :param previous_response_id: (Optional) ID of the previous response in a conversation - :param status: Current status of the response generation - :param temperature: (Optional) Sampling temperature used for generation - :param text: Text formatting configuration for the response - :param top_p: (Optional) Nucleus sampling parameter used for generation - :param tools: (Optional) An array of tools the model may call while generating a response. - :param truncation: (Optional) Truncation strategy applied to the response - :param usage: (Optional) Token usage information for the response - :param instructions: (Optional) System message inserted into the model's context - """ - - created_at: int - error: OpenAIResponseError | None = None - id: str - model: str - object: Literal["response"] = "response" - output: list[OpenAIResponseOutput] - parallel_tool_calls: bool = False - previous_response_id: str | None = None - status: str - temperature: float | None = None - # Default to text format to avoid breaking the loading of old responses - # before the field was added. New responses will have this set always. - text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) - top_p: float | None = None - tools: list[OpenAIResponseTool] | None = None - truncation: str | None = None - usage: OpenAIResponseUsage | None = None - instructions: str | None = None - - -@json_schema_type -class OpenAIDeleteResponseObject(BaseModel): - """Response object confirming deletion of an OpenAI response. - - :param id: Unique identifier of the deleted response - :param object: Object type identifier, always "response" - :param deleted: Deletion confirmation flag, always True - """ - - id: str - object: Literal["response"] = "response" - deleted: bool = True - - -@json_schema_type -class OpenAIResponseObjectStreamResponseCreated(BaseModel): - """Streaming event indicating a new response has been created. - - :param response: The response object that was created - :param type: Event type identifier, always "response.created" - """ - - response: OpenAIResponseObject - type: Literal["response.created"] = "response.created" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseInProgress(BaseModel): - """Streaming event indicating the response remains in progress. - - :param response: Current response state while in progress - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.in_progress" - """ - - response: OpenAIResponseObject - sequence_number: int - type: Literal["response.in_progress"] = "response.in_progress" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseCompleted(BaseModel): - """Streaming event indicating a response has been completed. - - :param response: Completed response object - :param type: Event type identifier, always "response.completed" - """ - - response: OpenAIResponseObject - type: Literal["response.completed"] = "response.completed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseIncomplete(BaseModel): - """Streaming event emitted when a response ends in an incomplete state. - - :param response: Response object describing the incomplete state - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.incomplete" - """ - - response: OpenAIResponseObject - sequence_number: int - type: Literal["response.incomplete"] = "response.incomplete" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFailed(BaseModel): - """Streaming event emitted when a response fails. - - :param response: Response object describing the failure - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.failed" - """ - - response: OpenAIResponseObject - sequence_number: int - type: Literal["response.failed"] = "response.failed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel): - """Streaming event for when a new output item is added to the response. - - :param response_id: Unique identifier of the response containing this output - :param item: The output item that was added (message, tool call, etc.) - :param output_index: Index position of this item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.output_item.added" - """ - - response_id: str - item: OpenAIResponseOutput - output_index: int - sequence_number: int - type: Literal["response.output_item.added"] = "response.output_item.added" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel): - """Streaming event for when an output item is completed. - - :param response_id: Unique identifier of the response containing this output - :param item: The completed output item (message, tool call, etc.) - :param output_index: Index position of this item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.output_item.done" - """ - - response_id: str - item: OpenAIResponseOutput - output_index: int - sequence_number: int - type: Literal["response.output_item.done"] = "response.output_item.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel): - """Streaming event for incremental text content updates. - - :param content_index: Index position within the text content - :param delta: Incremental text content being added - :param item_id: Unique identifier of the output item being updated - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.output_text.delta" - """ - - content_index: int - delta: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.output_text.delta"] = "response.output_text.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel): - """Streaming event for when text output is completed. - - :param content_index: Index position within the text content - :param text: Final complete text content of the output item - :param item_id: Unique identifier of the completed output item - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.output_text.done" - """ - - content_index: int - text: str # final text of the output item - item_id: str - output_index: int - sequence_number: int - type: Literal["response.output_text.done"] = "response.output_text.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel): - """Streaming event for incremental function call argument updates. - - :param delta: Incremental function call arguments being added - :param item_id: Unique identifier of the function call being updated - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.function_call_arguments.delta" - """ - - delta: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel): - """Streaming event for when function call arguments are completed. - - :param arguments: Final complete arguments JSON string for the function call - :param item_id: Unique identifier of the completed function call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.function_call_arguments.done" - """ - - arguments: str # final arguments of the function call - item_id: str - output_index: int - sequence_number: int - type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel): - """Streaming event for web search calls in progress. - - :param item_id: Unique identifier of the web search call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.web_search_call.in_progress" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel): - item_id: str - output_index: int - sequence_number: int - type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel): - """Streaming event for completed web search calls. - - :param item_id: Unique identifier of the completed web search call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.web_search_call.completed" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel): - sequence_number: int - type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel): - sequence_number: int - type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel): - sequence_number: int - type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel): - delta: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel): - arguments: str # final arguments of the MCP call - item_id: str - output_index: int - sequence_number: int - type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel): - """Streaming event for MCP calls in progress. - - :param item_id: Unique identifier of the MCP call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.mcp_call.in_progress" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel): - """Streaming event for failed MCP calls. - - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.mcp_call.failed" - """ - - sequence_number: int - type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel): - """Streaming event for completed MCP calls. - - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.mcp_call.completed" - """ - - sequence_number: int - type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed" - - -@json_schema_type -class OpenAIResponseContentPartOutputText(BaseModel): - """Text content within a streamed response part. - - :param type: Content part type identifier, always "output_text" - :param text: Text emitted for this content part - :param annotations: Structured annotations associated with the text - :param logprobs: (Optional) Token log probability details - """ - - type: Literal["output_text"] = "output_text" - text: str - annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) - logprobs: list[dict[str, Any]] | None = None - - -@json_schema_type -class OpenAIResponseContentPartReasoningText(BaseModel): - """Reasoning text emitted as part of a streamed response. - - :param type: Content part type identifier, always "reasoning_text" - :param text: Reasoning text supplied by the model - """ - - type: Literal["reasoning_text"] = "reasoning_text" - text: str - - -OpenAIResponseContentPart = Annotated[ - OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText, - Field(discriminator="type"), -] -register_schema(OpenAIResponseContentPart, name="OpenAIResponseContentPart") - - -@json_schema_type -class OpenAIResponseObjectStreamResponseContentPartAdded(BaseModel): - """Streaming event for when a new content part is added to a response item. - - :param content_index: Index position of the part within the content array - :param response_id: Unique identifier of the response containing this content - :param item_id: Unique identifier of the output item containing this content part - :param output_index: Index position of the output item in the response - :param part: The content part that was added - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.content_part.added" - """ - - content_index: int - response_id: str - item_id: str - output_index: int - part: OpenAIResponseContentPart - sequence_number: int - type: Literal["response.content_part.added"] = "response.content_part.added" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseContentPartDone(BaseModel): - """Streaming event for when a content part is completed. - - :param content_index: Index position of the part within the content array - :param response_id: Unique identifier of the response containing this content - :param item_id: Unique identifier of the output item containing this content part - :param output_index: Index position of the output item in the response - :param part: The completed content part - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.content_part.done" - """ - - content_index: int - response_id: str - item_id: str - output_index: int - part: OpenAIResponseContentPart - sequence_number: int - type: Literal["response.content_part.done"] = "response.content_part.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningTextDelta(BaseModel): - """Streaming event for incremental reasoning text updates. - - :param content_index: Index position of the reasoning content part - :param delta: Incremental reasoning text being added - :param item_id: Unique identifier of the output item being updated - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.reasoning_text.delta" - """ - - content_index: int - delta: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.reasoning_text.delta"] = "response.reasoning_text.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningTextDone(BaseModel): - """Streaming event for when reasoning text is completed. - - :param content_index: Index position of the reasoning content part - :param text: Final complete reasoning text - :param item_id: Unique identifier of the completed output item - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.reasoning_text.done" - """ - - content_index: int - text: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.reasoning_text.done"] = "response.reasoning_text.done" - - -@json_schema_type -class OpenAIResponseContentPartReasoningSummary(BaseModel): - """Reasoning summary part in a streamed response. - - :param type: Content part type identifier, always "summary_text" - :param text: Summary text - """ - - type: Literal["summary_text"] = "summary_text" - text: str - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded(BaseModel): - """Streaming event for when a new reasoning summary part is added. - - :param item_id: Unique identifier of the output item - :param output_index: Index position of the output item - :param part: The summary part that was added - :param sequence_number: Sequential number for ordering streaming events - :param summary_index: Index of the summary part within the reasoning summary - :param type: Event type identifier, always "response.reasoning_summary_part.added" - """ - - item_id: str - output_index: int - part: OpenAIResponseContentPartReasoningSummary - sequence_number: int - summary_index: int - type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningSummaryPartDone(BaseModel): - """Streaming event for when a reasoning summary part is completed. - - :param item_id: Unique identifier of the output item - :param output_index: Index position of the output item - :param part: The completed summary part - :param sequence_number: Sequential number for ordering streaming events - :param summary_index: Index of the summary part within the reasoning summary - :param type: Event type identifier, always "response.reasoning_summary_part.done" - """ - - item_id: str - output_index: int - part: OpenAIResponseContentPartReasoningSummary - sequence_number: int - summary_index: int - type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta(BaseModel): - """Streaming event for incremental reasoning summary text updates. - - :param delta: Incremental summary text being added - :param item_id: Unique identifier of the output item - :param output_index: Index position of the output item - :param sequence_number: Sequential number for ordering streaming events - :param summary_index: Index of the summary part within the reasoning summary - :param type: Event type identifier, always "response.reasoning_summary_text.delta" - """ - - delta: str - item_id: str - output_index: int - sequence_number: int - summary_index: int - type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseReasoningSummaryTextDone(BaseModel): - """Streaming event for when reasoning summary text is completed. - - :param text: Final complete summary text - :param item_id: Unique identifier of the output item - :param output_index: Index position of the output item - :param sequence_number: Sequential number for ordering streaming events - :param summary_index: Index of the summary part within the reasoning summary - :param type: Event type identifier, always "response.reasoning_summary_text.done" - """ - - text: str - item_id: str - output_index: int - sequence_number: int - summary_index: int - type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseRefusalDelta(BaseModel): - """Streaming event for incremental refusal text updates. - - :param content_index: Index position of the content part - :param delta: Incremental refusal text being added - :param item_id: Unique identifier of the output item - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.refusal.delta" - """ - - content_index: int - delta: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.refusal.delta"] = "response.refusal.delta" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseRefusalDone(BaseModel): - """Streaming event for when refusal text is completed. - - :param content_index: Index position of the content part - :param refusal: Final complete refusal text - :param item_id: Unique identifier of the output item - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.refusal.done" - """ - - content_index: int - refusal: str - item_id: str - output_index: int - sequence_number: int - type: Literal["response.refusal.done"] = "response.refusal.done" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded(BaseModel): - """Streaming event for when an annotation is added to output text. - - :param item_id: Unique identifier of the item to which the annotation is being added - :param output_index: Index position of the output item in the response's output array - :param content_index: Index position of the content part within the output item - :param annotation_index: Index of the annotation within the content part - :param annotation: The annotation object being added - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.output_text.annotation.added" - """ - - item_id: str - output_index: int - content_index: int - annotation_index: int - annotation: OpenAIResponseAnnotations - sequence_number: int - type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFileSearchCallInProgress(BaseModel): - """Streaming event for file search calls in progress. - - :param item_id: Unique identifier of the file search call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.file_search_call.in_progress" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.file_search_call.in_progress"] = "response.file_search_call.in_progress" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFileSearchCallSearching(BaseModel): - """Streaming event for file search currently searching. - - :param item_id: Unique identifier of the file search call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.file_search_call.searching" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.file_search_call.searching"] = "response.file_search_call.searching" - - -@json_schema_type -class OpenAIResponseObjectStreamResponseFileSearchCallCompleted(BaseModel): - """Streaming event for completed file search calls. - - :param item_id: Unique identifier of the completed file search call - :param output_index: Index position of the item in the output list - :param sequence_number: Sequential number for ordering streaming events - :param type: Event type identifier, always "response.file_search_call.completed" - """ - - item_id: str - output_index: int - sequence_number: int - type: Literal["response.file_search_call.completed"] = "response.file_search_call.completed" - - -OpenAIResponseObjectStream = Annotated[ - OpenAIResponseObjectStreamResponseCreated - | OpenAIResponseObjectStreamResponseInProgress - | OpenAIResponseObjectStreamResponseOutputItemAdded - | OpenAIResponseObjectStreamResponseOutputItemDone - | OpenAIResponseObjectStreamResponseOutputTextDelta - | OpenAIResponseObjectStreamResponseOutputTextDone - | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta - | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone - | OpenAIResponseObjectStreamResponseWebSearchCallInProgress - | OpenAIResponseObjectStreamResponseWebSearchCallSearching - | OpenAIResponseObjectStreamResponseWebSearchCallCompleted - | OpenAIResponseObjectStreamResponseMcpListToolsInProgress - | OpenAIResponseObjectStreamResponseMcpListToolsFailed - | OpenAIResponseObjectStreamResponseMcpListToolsCompleted - | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta - | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone - | OpenAIResponseObjectStreamResponseMcpCallInProgress - | OpenAIResponseObjectStreamResponseMcpCallFailed - | OpenAIResponseObjectStreamResponseMcpCallCompleted - | OpenAIResponseObjectStreamResponseContentPartAdded - | OpenAIResponseObjectStreamResponseContentPartDone - | OpenAIResponseObjectStreamResponseReasoningTextDelta - | OpenAIResponseObjectStreamResponseReasoningTextDone - | OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded - | OpenAIResponseObjectStreamResponseReasoningSummaryPartDone - | OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta - | OpenAIResponseObjectStreamResponseReasoningSummaryTextDone - | OpenAIResponseObjectStreamResponseRefusalDelta - | OpenAIResponseObjectStreamResponseRefusalDone - | OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded - | OpenAIResponseObjectStreamResponseFileSearchCallInProgress - | OpenAIResponseObjectStreamResponseFileSearchCallSearching - | OpenAIResponseObjectStreamResponseFileSearchCallCompleted - | OpenAIResponseObjectStreamResponseIncomplete - | OpenAIResponseObjectStreamResponseFailed - | OpenAIResponseObjectStreamResponseCompleted, - Field(discriminator="type"), -] -register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") - - -@json_schema_type -class OpenAIResponseInputFunctionToolCallOutput(BaseModel): - """ - This represents the output of a function call that gets passed back to the model. - """ - - call_id: str - output: str - type: Literal["function_call_output"] = "function_call_output" - id: str | None = None - status: str | None = None - - -OpenAIResponseInput = Annotated[ - # Responses API allows output messages to be passed in as input - OpenAIResponseOutputMessageWebSearchToolCall - | OpenAIResponseOutputMessageFileSearchToolCall - | OpenAIResponseOutputMessageFunctionToolCall - | OpenAIResponseInputFunctionToolCallOutput - | OpenAIResponseMCPApprovalRequest - | OpenAIResponseMCPApprovalResponse - | OpenAIResponseOutputMessageMCPCall - | OpenAIResponseOutputMessageMCPListTools - | OpenAIResponseMessage, - Field(union_mode="left_to_right"), -] -register_schema(OpenAIResponseInput, name="OpenAIResponseInput") - - -class ListOpenAIResponseInputItem(BaseModel): - """List container for OpenAI response input items. - - :param data: List of input items - :param object: Object type identifier, always "list" - """ - - data: list[OpenAIResponseInput] - object: Literal["list"] = "list" - - -@json_schema_type -class OpenAIResponseObjectWithInput(OpenAIResponseObject): - """OpenAI response object extended with input context information. - - :param input: List of input items that led to this response - """ - - input: list[OpenAIResponseInput] - - def to_response_object(self) -> OpenAIResponseObject: - """Convert to OpenAIResponseObject by excluding input field.""" - return OpenAIResponseObject(**{k: v for k, v in self.model_dump().items() if k != "input"}) - - -@json_schema_type -class ListOpenAIResponseObject(BaseModel): - """Paginated list of OpenAI response objects with navigation metadata. - - :param data: List of response objects with their input context - :param has_more: Whether there are more results available beyond this page - :param first_id: Identifier of the first item in this page - :param last_id: Identifier of the last item in this page - :param object: Object type identifier, always "list" - """ - - data: list[OpenAIResponseObjectWithInput] - has_more: bool - first_id: str - last_id: str - object: Literal["list"] = "list" diff --git a/llama_stack/apis/batches/__init__.py b/llama_stack/apis/batches/__init__.py deleted file mode 100644 index 9ce7d3d75..000000000 --- a/llama_stack/apis/batches/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .batches import Batches, BatchObject, ListBatchesResponse - -__all__ = ["Batches", "BatchObject", "ListBatchesResponse"] diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py deleted file mode 100644 index 2801fa658..000000000 --- a/llama_stack/apis/batches/batches.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Literal, Protocol, runtime_checkable - -from pydantic import BaseModel, Field - -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod - -try: - from openai.types import Batch as BatchObject -except ImportError as e: - raise ImportError("OpenAI package is required for batches API. Please install it with: pip install openai") from e - - -@json_schema_type -class ListBatchesResponse(BaseModel): - """Response containing a list of batch objects.""" - - object: Literal["list"] = "list" - data: list[BatchObject] = Field(..., description="List of batch objects") - first_id: str | None = Field(default=None, description="ID of the first batch in the list") - last_id: str | None = Field(default=None, description="ID of the last batch in the list") - has_more: bool = Field(default=False, description="Whether there are more batches available") - - -@runtime_checkable -class Batches(Protocol): - """ - The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. - - The API is designed to allow use of openai client libraries for seamless integration. - - This API provides the following extensions: - - idempotent batch creation - - Note: This API is currently under active development and may undergo changes. - """ - - @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1) - async def create_batch( - self, - input_file_id: str, - endpoint: str, - completion_window: Literal["24h"], - metadata: dict[str, str] | None = None, - idempotency_key: str | None = None, - ) -> BatchObject: - """Create a new batch for processing multiple API requests. - - :param input_file_id: The ID of an uploaded file containing requests for the batch. - :param endpoint: The endpoint to be used for all requests in the batch. - :param completion_window: The time window within which the batch should be processed. - :param metadata: Optional metadata for the batch. - :param idempotency_key: Optional idempotency key. When provided, enables idempotent behavior. - :returns: The created batch object. - """ - ... - - @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1) - async def retrieve_batch(self, batch_id: str) -> BatchObject: - """Retrieve information about a specific batch. - - :param batch_id: The ID of the batch to retrieve. - :returns: The batch object. - """ - ... - - @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1) - async def cancel_batch(self, batch_id: str) -> BatchObject: - """Cancel a batch that is in progress. - - :param batch_id: The ID of the batch to cancel. - :returns: The updated batch object. - """ - ... - - @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1) - async def list_batches( - self, - after: str | None = None, - limit: int = 20, - ) -> ListBatchesResponse: - """List all batches for the current user. - - :param after: A cursor for pagination; returns batches after this batch ID. - :param limit: Number of batches to return (default 20, max 100). - :returns: A list of batch objects. - """ - ... diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py deleted file mode 100644 index 62d1b367c..000000000 --- a/llama_stack/apis/benchmarks/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .benchmarks import * diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py deleted file mode 100644 index d87d45a60..000000000 --- a/llama_stack/apis/benchmarks/benchmarks.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any, Literal, Protocol, runtime_checkable - -from pydantic import BaseModel, Field - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, webmethod - - -class CommonBenchmarkFields(BaseModel): - dataset_id: str - scoring_functions: list[str] - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Metadata for this evaluation task", - ) - - -@json_schema_type -class Benchmark(CommonBenchmarkFields, Resource): - """A benchmark resource for evaluating model performance. - - :param dataset_id: Identifier of the dataset to use for the benchmark evaluation - :param scoring_functions: List of scoring function identifiers to apply during evaluation - :param metadata: Metadata for this evaluation task - :param type: The resource type, always benchmark - """ - - type: Literal[ResourceType.benchmark] = ResourceType.benchmark - - @property - def benchmark_id(self) -> str: - return self.identifier - - @property - def provider_benchmark_id(self) -> str | None: - return self.provider_resource_id - - -class BenchmarkInput(CommonBenchmarkFields, BaseModel): - benchmark_id: str - provider_id: str | None = None - provider_benchmark_id: str | None = None - - -class ListBenchmarksResponse(BaseModel): - data: list[Benchmark] - - -@runtime_checkable -class Benchmarks(Protocol): - @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def list_benchmarks(self) -> ListBenchmarksResponse: - """List all benchmarks. - - :returns: A ListBenchmarksResponse. - """ - ... - - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_benchmark( - self, - benchmark_id: str, - ) -> Benchmark: - """Get a benchmark by its ID. - - :param benchmark_id: The ID of the benchmark to get. - :returns: A Benchmark. - """ - ... - - @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def register_benchmark( - self, - benchmark_id: str, - dataset_id: str, - scoring_functions: list[str], - provider_benchmark_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - """Register a benchmark. - - :param benchmark_id: The ID of the benchmark to register. - :param dataset_id: The ID of the dataset to use for the benchmark. - :param scoring_functions: The scoring functions to use for the benchmark. - :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark. - :param provider_id: The ID of the provider to use for the benchmark. - :param metadata: The metadata to use for the benchmark. - """ - ... - - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) - async def unregister_benchmark(self, benchmark_id: str) -> None: - """Unregister a benchmark. - - :param benchmark_id: The ID of the benchmark to unregister. - """ - ... diff --git a/llama_stack/apis/common/content_types.py b/llama_stack/apis/common/content_types.py deleted file mode 100644 index 950dd17ff..000000000 --- a/llama_stack/apis/common/content_types.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum -from typing import Annotated, Literal - -from pydantic import BaseModel, Field, model_validator - -from llama_stack.models.llama.datatypes import ToolCall -from llama_stack.schema_utils import json_schema_type, register_schema - - -@json_schema_type -class URL(BaseModel): - """A URL reference to external content. - - :param uri: The URL string pointing to the resource - """ - - uri: str - - -class _URLOrData(BaseModel): - """ - A URL or a base64 encoded string - - :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. - :param data: base64 encoded image data as string - """ - - url: URL | None = None - # data is a base64 encoded string, hint with contentEncoding=base64 - data: str | None = Field(default=None, json_schema_extra={"contentEncoding": "base64"}) - - @model_validator(mode="before") - @classmethod - def validator(cls, values): - if isinstance(values, dict): - return values - return {"url": values} - - -@json_schema_type -class ImageContentItem(BaseModel): - """A image content item - - :param type: Discriminator type of the content item. Always "image" - :param image: Image as a base64 encoded string or an URL - """ - - type: Literal["image"] = "image" - image: _URLOrData - - -@json_schema_type -class TextContentItem(BaseModel): - """A text content item - - :param type: Discriminator type of the content item. Always "text" - :param text: Text content - """ - - type: Literal["text"] = "text" - text: str - - -# other modalities can be added here -InterleavedContentItem = Annotated[ - ImageContentItem | TextContentItem, - Field(discriminator="type"), -] -register_schema(InterleavedContentItem, name="InterleavedContentItem") - -# accept a single "str" as a special case since it is common -InterleavedContent = str | InterleavedContentItem | list[InterleavedContentItem] -register_schema(InterleavedContent, name="InterleavedContent") - - -@json_schema_type -class TextDelta(BaseModel): - """A text content delta for streaming responses. - - :param type: Discriminator type of the delta. Always "text" - :param text: The incremental text content - """ - - type: Literal["text"] = "text" - text: str - - -@json_schema_type -class ImageDelta(BaseModel): - """An image content delta for streaming responses. - - :param type: Discriminator type of the delta. Always "image" - :param image: The incremental image data as bytes - """ - - type: Literal["image"] = "image" - image: bytes - - -class ToolCallParseStatus(Enum): - """Status of tool call parsing during streaming. - :cvar started: Tool call parsing has begun - :cvar in_progress: Tool call parsing is ongoing - :cvar failed: Tool call parsing failed - :cvar succeeded: Tool call parsing completed successfully - """ - - started = "started" - in_progress = "in_progress" - failed = "failed" - succeeded = "succeeded" - - -@json_schema_type -class ToolCallDelta(BaseModel): - """A tool call content delta for streaming responses. - - :param type: Discriminator type of the delta. Always "tool_call" - :param tool_call: Either an in-progress tool call string or the final parsed tool call - :param parse_status: Current parsing status of the tool call - """ - - type: Literal["tool_call"] = "tool_call" - - # you either send an in-progress tool call so the client can stream a long - # code generation or you send the final parsed tool call at the end of the - # stream - tool_call: str | ToolCall - parse_status: ToolCallParseStatus - - -# streaming completions send a stream of ContentDeltas -ContentDelta = Annotated[ - TextDelta | ImageDelta | ToolCallDelta, - Field(discriminator="type"), -] -register_schema(ContentDelta, name="ContentDelta") diff --git a/llama_stack/apis/common/responses.py b/llama_stack/apis/common/responses.py deleted file mode 100644 index 616bee73a..000000000 --- a/llama_stack/apis/common/responses.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum -from typing import Any - -from pydantic import BaseModel - -from llama_stack.schema_utils import json_schema_type - - -class Order(Enum): - """Sort order for paginated responses. - :cvar asc: Ascending order - :cvar desc: Descending order - """ - - asc = "asc" - desc = "desc" - - -@json_schema_type -class PaginatedResponse(BaseModel): - """A generic paginated response that follows a simple format. - - :param data: The list of items for the current page - :param has_more: Whether there are more items available after this set - :param url: The URL for accessing this list - """ - - data: list[dict[str, Any]] - has_more: bool - url: str | None = None diff --git a/llama_stack/apis/conversations/__init__.py b/llama_stack/apis/conversations/__init__.py deleted file mode 100644 index 2d214d27a..000000000 --- a/llama_stack/apis/conversations/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .conversations import ( - Conversation, - ConversationCreateRequest, - ConversationDeletedResource, - ConversationItem, - ConversationItemCreateRequest, - ConversationItemDeletedResource, - ConversationItemList, - Conversations, - ConversationUpdateRequest, - Metadata, -) - -__all__ = [ - "Conversation", - "ConversationCreateRequest", - "ConversationDeletedResource", - "ConversationItem", - "ConversationItemCreateRequest", - "ConversationItemDeletedResource", - "ConversationItemList", - "Conversations", - "ConversationUpdateRequest", - "Metadata", -] diff --git a/llama_stack/apis/conversations/conversations.py b/llama_stack/apis/conversations/conversations.py deleted file mode 100644 index d7752995d..000000000 --- a/llama_stack/apis/conversations/conversations.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Annotated, Literal, Protocol, runtime_checkable - -from openai import NOT_GIVEN -from openai._types import NotGiven -from openai.types.responses.response_includable import ResponseIncludable -from pydantic import BaseModel, Field - -from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseInputFunctionToolCallOutput, - OpenAIResponseMCPApprovalRequest, - OpenAIResponseMCPApprovalResponse, - OpenAIResponseMessage, - OpenAIResponseOutputMessageFileSearchToolCall, - OpenAIResponseOutputMessageFunctionToolCall, - OpenAIResponseOutputMessageMCPCall, - OpenAIResponseOutputMessageMCPListTools, - OpenAIResponseOutputMessageWebSearchToolCall, -) -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - -Metadata = dict[str, str] - - -@json_schema_type -class Conversation(BaseModel): - """OpenAI-compatible conversation object.""" - - id: str = Field(..., description="The unique ID of the conversation.") - object: Literal["conversation"] = Field( - default="conversation", description="The object type, which is always conversation." - ) - created_at: int = Field( - ..., description="The time at which the conversation was created, measured in seconds since the Unix epoch." - ) - metadata: Metadata | None = Field( - default=None, - description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.", - ) - items: list[dict] | None = Field( - default=None, - description="Initial items to include in the conversation context. You may add up to 20 items at a time.", - ) - - -@json_schema_type -class ConversationMessage(BaseModel): - """OpenAI-compatible message item for conversations.""" - - id: str = Field(..., description="unique identifier for this message") - content: list[dict] = Field(..., description="message content") - role: str = Field(..., description="message role") - status: str = Field(..., description="message status") - type: Literal["message"] = "message" - object: Literal["message"] = "message" - - -ConversationItem = Annotated[ - OpenAIResponseMessage - | OpenAIResponseOutputMessageWebSearchToolCall - | OpenAIResponseOutputMessageFileSearchToolCall - | OpenAIResponseOutputMessageFunctionToolCall - | OpenAIResponseInputFunctionToolCallOutput - | OpenAIResponseMCPApprovalRequest - | OpenAIResponseMCPApprovalResponse - | OpenAIResponseOutputMessageMCPCall - | OpenAIResponseOutputMessageMCPListTools - | OpenAIResponseOutputMessageMCPCall - | OpenAIResponseOutputMessageMCPListTools, - Field(discriminator="type"), -] -register_schema(ConversationItem, name="ConversationItem") - -# Using OpenAI types directly caused issues but some notes for reference: -# Note that ConversationItem is a Annotated Union of the types below: -# from openai.types.responses import * -# from openai.types.responses.response_item import * -# from openai.types.conversations import ConversationItem -# f = [ -# ResponseFunctionToolCallItem, -# ResponseFunctionToolCallOutputItem, -# ResponseFileSearchToolCall, -# ResponseFunctionWebSearch, -# ImageGenerationCall, -# ResponseComputerToolCall, -# ResponseComputerToolCallOutputItem, -# ResponseReasoningItem, -# ResponseCodeInterpreterToolCall, -# LocalShellCall, -# LocalShellCallOutput, -# McpListTools, -# McpApprovalRequest, -# McpApprovalResponse, -# McpCall, -# ResponseCustomToolCall, -# ResponseCustomToolCallOutput -# ] - - -@json_schema_type -class ConversationCreateRequest(BaseModel): - """Request body for creating a conversation.""" - - items: list[ConversationItem] | None = Field( - default=[], - description="Initial items to include in the conversation context. You may add up to 20 items at a time.", - max_length=20, - ) - metadata: Metadata | None = Field( - default={}, - description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information", - max_length=16, - ) - - -@json_schema_type -class ConversationUpdateRequest(BaseModel): - """Request body for updating a conversation.""" - - metadata: Metadata = Field( - ..., - description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.", - ) - - -@json_schema_type -class ConversationDeletedResource(BaseModel): - """Response for deleted conversation.""" - - id: str = Field(..., description="The deleted conversation identifier") - object: str = Field(default="conversation.deleted", description="Object type") - deleted: bool = Field(default=True, description="Whether the object was deleted") - - -@json_schema_type -class ConversationItemCreateRequest(BaseModel): - """Request body for creating conversation items.""" - - items: list[ConversationItem] = Field( - ..., - description="Items to include in the conversation context. You may add up to 20 items at a time.", - max_length=20, - ) - - -@json_schema_type -class ConversationItemList(BaseModel): - """List of conversation items with pagination.""" - - object: str = Field(default="list", description="Object type") - data: list[ConversationItem] = Field(..., description="List of conversation items") - first_id: str | None = Field(default=None, description="The ID of the first item in the list") - last_id: str | None = Field(default=None, description="The ID of the last item in the list") - has_more: bool = Field(default=False, description="Whether there are more items available") - - -@json_schema_type -class ConversationItemDeletedResource(BaseModel): - """Response for deleted conversation item.""" - - id: str = Field(..., description="The deleted item identifier") - object: str = Field(default="conversation.item.deleted", description="Object type") - deleted: bool = Field(default=True, description="Whether the object was deleted") - - -@runtime_checkable -@trace_protocol -class Conversations(Protocol): - """Conversations - - Protocol for conversation management operations.""" - - @webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1) - async def create_conversation( - self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None - ) -> Conversation: - """Create a conversation. - - Create a conversation. - - :param items: Initial items to include in the conversation context. - :param metadata: Set of key-value pairs that can be attached to an object. - :returns: The created conversation object. - """ - ... - - @webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1) - async def get_conversation(self, conversation_id: str) -> Conversation: - """Retrieve a conversation. - - Get a conversation with the given ID. - - :param conversation_id: The conversation identifier. - :returns: The conversation object. - """ - ... - - @webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1) - async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation: - """Update a conversation. - - Update a conversation's metadata with the given ID. - - :param conversation_id: The conversation identifier. - :param metadata: Set of key-value pairs that can be attached to an object. - :returns: The updated conversation object. - """ - ... - - @webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1) - async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource: - """Delete a conversation. - - Delete a conversation with the given ID. - - :param conversation_id: The conversation identifier. - :returns: The deleted conversation resource. - """ - ... - - @webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1) - async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList: - """Create items. - - Create items in the conversation. - - :param conversation_id: The conversation identifier. - :param items: Items to include in the conversation context. - :returns: List of created items. - """ - ... - - @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1) - async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem: - """Retrieve an item. - - Retrieve a conversation item. - - :param conversation_id: The conversation identifier. - :param item_id: The item identifier. - :returns: The conversation item. - """ - ... - - @webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1) - async def list( - self, - conversation_id: str, - after: str | NotGiven = NOT_GIVEN, - include: list[ResponseIncludable] | NotGiven = NOT_GIVEN, - limit: int | NotGiven = NOT_GIVEN, - order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, - ) -> ConversationItemList: - """List items. - - List items in the conversation. - - :param conversation_id: The conversation identifier. - :param after: An item ID to list items after, used in pagination. - :param include: Specify additional output data to include in the response. - :param limit: A limit on the number of objects to be returned (1-100, default 20). - :param order: The order to return items in (asc or desc, default desc). - :returns: List of conversation items. - """ - ... - - @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1) - async def openai_delete_conversation_item( - self, conversation_id: str, item_id: str - ) -> ConversationItemDeletedResource: - """Delete an item. - - Delete a conversation item. - - :param conversation_id: The conversation identifier. - :param item_id: The item identifier. - :returns: The deleted item resource. - """ - ... diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py deleted file mode 100644 index 8c087bfa4..000000000 --- a/llama_stack/apis/datasetio/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .datasetio import * diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py deleted file mode 100644 index 5b23c83d6..000000000 --- a/llama_stack/apis/datasetio/datasetio.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Protocol, runtime_checkable - -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasets import Dataset -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA -from llama_stack.schema_utils import webmethod - - -class DatasetStore(Protocol): - def get_dataset(self, dataset_id: str) -> Dataset: ... - - -@runtime_checkable -class DatasetIO(Protocol): - # keeping for aligning with inference/safety, but this is not used - dataset_store: DatasetStore - - @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) - async def iterrows( - self, - dataset_id: str, - start_index: int | None = None, - limit: int | None = None, - ) -> PaginatedResponse: - """Get a paginated list of rows from a dataset. - - Uses offset-based pagination where: - - start_index: The starting index (0-based). If None, starts from beginning. - - limit: Number of items to return. If None or -1, returns all items. - - The response includes: - - data: List of items for the current page. - - has_more: Whether there are more items available after this set. - - :param dataset_id: The ID of the dataset to get the rows from. - :param start_index: Index into dataset for the first row to get. Get all rows if None. - :param limit: The number of rows to get. - :returns: A PaginatedResponse. - """ - ... - - @webmethod( - route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1 - ) - @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA) - async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: - """Append rows to a dataset. - - :param dataset_id: The ID of the dataset to append the rows to. - :param rows: The rows to append to the dataset. - """ - ... diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py deleted file mode 100644 index 9c9a128d2..000000000 --- a/llama_stack/apis/datasets/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .datasets import * diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py deleted file mode 100644 index e46dfb6d4..000000000 --- a/llama_stack/apis/datasets/datasets.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -class DatasetPurpose(StrEnum): - """ - Purpose of the dataset. Each purpose has a required input data schema. - - :cvar post-training/messages: The dataset contains messages used for post-training. - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - :cvar eval/question-answer: The dataset contains a question column and an answer column. - { - "question": "What is the capital of France?", - "answer": "Paris" - } - :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. - { - "messages": [ - {"role": "user", "content": "Hello, my name is John Doe."}, - {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, - {"role": "user", "content": "What's my name?"}, - ], - "answer": "John Doe" - } - """ - - post_training_messages = "post-training/messages" - eval_question_answer = "eval/question-answer" - eval_messages_answer = "eval/messages-answer" - - # TODO: add more schemas here - - -class DatasetType(Enum): - """ - Type of the dataset source. - :cvar uri: The dataset can be obtained from a URI. - :cvar rows: The dataset is stored in rows. - """ - - uri = "uri" - rows = "rows" - - -@json_schema_type -class URIDataSource(BaseModel): - """A dataset that can be obtained from a URI. - :param uri: The dataset can be obtained from a URI. E.g. - - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - - "data:csv;base64,{base64_content}" - """ - - type: Literal["uri"] = "uri" - uri: str - - -@json_schema_type -class RowsDataSource(BaseModel): - """A dataset stored in rows. - :param rows: The dataset is stored in rows. E.g. - - [ - {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]} - ] - """ - - type: Literal["rows"] = "rows" - rows: list[dict[str, Any]] - - -DataSource = Annotated[ - URIDataSource | RowsDataSource, - Field(discriminator="type"), -] -register_schema(DataSource, name="DataSource") - - -class CommonDatasetFields(BaseModel): - """ - Common fields for a dataset. - - :param purpose: Purpose of the dataset indicating its intended use - :param source: Data source configuration for the dataset - :param metadata: Additional metadata for the dataset - """ - - purpose: DatasetPurpose - source: DataSource - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Any additional metadata for this dataset", - ) - - -@json_schema_type -class Dataset(CommonDatasetFields, Resource): - """Dataset resource for storing and accessing training or evaluation data. - - :param type: Type of resource, always 'dataset' for datasets - """ - - type: Literal[ResourceType.dataset] = ResourceType.dataset - - @property - def dataset_id(self) -> str: - return self.identifier - - @property - def provider_dataset_id(self) -> str | None: - return self.provider_resource_id - - -class DatasetInput(CommonDatasetFields, BaseModel): - """Input parameters for dataset operations. - - :param dataset_id: Unique identifier for the dataset - """ - - dataset_id: str - - -class ListDatasetsResponse(BaseModel): - """Response from listing datasets. - - :param data: List of datasets - """ - - data: list[Dataset] - - -class Datasets(Protocol): - @webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA) - async def register_dataset( - self, - purpose: DatasetPurpose, - source: DataSource, - metadata: dict[str, Any] | None = None, - dataset_id: str | None = None, - ) -> Dataset: - """ - Register a new dataset. - - :param purpose: The purpose of the dataset. - One of: - - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - - "eval/question-answer": The dataset contains a question column and an answer column for evaluation. - { - "question": "What is the capital of France?", - "answer": "Paris" - } - - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation. - { - "messages": [ - {"role": "user", "content": "Hello, my name is John Doe."}, - {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, - {"role": "user", "content": "What's my name?"}, - ], - "answer": "John Doe" - } - :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - - { - "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" - } - - { - "type": "uri", - "uri": "lsfs://mydata.jsonl" - } - - { - "type": "uri", - "uri": "data:csv;base64,{base64_content}" - } - - { - "type": "uri", - "uri": "huggingface://llamastack/simpleqa?split=train" - } - - { - "type": "rows", - "rows": [ - { - "messages": [ - {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, - ] - } - ] - } - :param metadata: The metadata for the dataset. - - E.g. {"description": "My dataset"}. - :param dataset_id: The ID of the dataset. If not provided, an ID will be generated. - :returns: A Dataset. - """ - ... - - @webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) - async def get_dataset( - self, - dataset_id: str, - ) -> Dataset: - """Get a dataset by its ID. - - :param dataset_id: The ID of the dataset to get. - :returns: A Dataset. - """ - ... - - @webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA) - async def list_datasets(self) -> ListDatasetsResponse: - """List all datasets. - - :returns: A ListDatasetsResponse. - """ - ... - - @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1) - @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA) - async def unregister_dataset( - self, - dataset_id: str, - ) -> None: - """Unregister a dataset by its ID. - - :param dataset_id: The ID of the dataset to unregister. - """ - ... diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py deleted file mode 100644 index 948ec615f..000000000 --- a/llama_stack/apis/datatypes.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, EnumMeta - -from pydantic import BaseModel, Field - -from llama_stack.schema_utils import json_schema_type - - -class DynamicApiMeta(EnumMeta): - def __new__(cls, name, bases, namespace): - # Store the original enum values - original_values = {k: v for k, v in namespace.items() if not k.startswith("_")} - - # Create the enum class - cls = super().__new__(cls, name, bases, namespace) - - # Store the original values for reference - cls._original_values = original_values - # Initialize _dynamic_values - cls._dynamic_values = {} - - return cls - - def __call__(cls, value): - try: - return super().__call__(value) - except ValueError as e: - # If this value was already dynamically added, return it - if value in cls._dynamic_values: - return cls._dynamic_values[value] - - # If the value doesn't exist, create a new enum member - # Create a new member name from the value - member_name = value.lower().replace("-", "_") - - # If this member name already exists in the enum, return the existing member - if member_name in cls._member_map_: - return cls._member_map_[member_name] - - # Instead of creating a new member, raise ValueError to force users to use Api.add() to - # register new APIs explicitly - raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e - - def __iter__(cls): - # Allow iteration over both static and dynamic members - yield from super().__iter__() - if hasattr(cls, "_dynamic_values"): - yield from cls._dynamic_values.values() - - def add(cls, value): - """ - Add a new API to the enum. - Used to register external APIs. - """ - member_name = value.lower().replace("-", "_") - - # If this member name already exists in the enum, return it - if member_name in cls._member_map_: - return cls._member_map_[member_name] - - # Create a new enum member - member = object.__new__(cls) - member._name_ = member_name - member._value_ = value - - # Add it to the enum class - cls._member_map_[member_name] = member - cls._member_names_.append(member_name) - cls._member_type_ = str - - # Store it in our dynamic values - cls._dynamic_values[value] = member - - return member - - -@json_schema_type -class Api(Enum, metaclass=DynamicApiMeta): - """Enumeration of all available APIs in the Llama Stack system. - :cvar providers: Provider management and configuration - :cvar inference: Text generation, chat completions, and embeddings - :cvar safety: Content moderation and safety shields - :cvar agents: Agent orchestration and execution - :cvar batches: Batch processing for asynchronous API requests - :cvar vector_io: Vector database operations and queries - :cvar datasetio: Dataset input/output operations - :cvar scoring: Model output evaluation and scoring - :cvar eval: Model evaluation and benchmarking framework - :cvar post_training: Fine-tuning and model training - :cvar tool_runtime: Tool execution and management - :cvar telemetry: Observability and system monitoring - :cvar models: Model metadata and management - :cvar shields: Safety shield implementations - :cvar datasets: Dataset creation and management - :cvar scoring_functions: Scoring function definitions - :cvar benchmarks: Benchmark suite management - :cvar tool_groups: Tool group organization - :cvar files: File storage and management - :cvar prompts: Prompt versions and management - :cvar inspect: Built-in system inspection and introspection - """ - - providers = "providers" - inference = "inference" - safety = "safety" - agents = "agents" - batches = "batches" - vector_io = "vector_io" - datasetio = "datasetio" - scoring = "scoring" - eval = "eval" - post_training = "post_training" - tool_runtime = "tool_runtime" - - telemetry = "telemetry" - - models = "models" - shields = "shields" - vector_stores = "vector_stores" # only used for routing table - datasets = "datasets" - scoring_functions = "scoring_functions" - benchmarks = "benchmarks" - tool_groups = "tool_groups" - files = "files" - prompts = "prompts" - conversations = "conversations" - - # built-in API - inspect = "inspect" - - -@json_schema_type -class Error(BaseModel): - """ - Error response from the API. Roughly follows RFC 7807. - - :param status: HTTP status code - :param title: Error title, a short summary of the error which is invariant for an error type - :param detail: Error detail, a longer human-readable description of the error - :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error - """ - - status: int - title: str - detail: str - instance: str | None = None - - -class ExternalApiSpec(BaseModel): - """Specification for an external API implementation.""" - - module: str = Field(..., description="Python module containing the API implementation") - name: str = Field(..., description="Name of the API") - pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API") - protocol: str = Field(..., description="Name of the protocol class for the API") diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py deleted file mode 100644 index 28a1d6049..000000000 --- a/llama_stack/apis/eval/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .eval import * diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py deleted file mode 100644 index c9418b04b..000000000 --- a/llama_stack/apis/eval/eval.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field - -from llama_stack.apis.agents import AgentConfig -from llama_stack.apis.common.job_types import Job -from llama_stack.apis.inference import SamplingParams, SystemMessage -from llama_stack.apis.scoring import ScoringResult -from llama_stack.apis.scoring_functions import ScoringFnParams -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -@json_schema_type -class ModelCandidate(BaseModel): - """A model candidate for evaluation. - - :param model: The model ID to evaluate. - :param sampling_params: The sampling parameters for the model. - :param system_message: (Optional) The system message providing instructions or context to the model. - """ - - type: Literal["model"] = "model" - model: str - sampling_params: SamplingParams - system_message: SystemMessage | None = None - - -@json_schema_type -class AgentCandidate(BaseModel): - """An agent candidate for evaluation. - - :param config: The configuration for the agent candidate. - """ - - type: Literal["agent"] = "agent" - config: AgentConfig - - -EvalCandidate = Annotated[ModelCandidate | AgentCandidate, Field(discriminator="type")] -register_schema(EvalCandidate, name="EvalCandidate") - - -@json_schema_type -class BenchmarkConfig(BaseModel): - """A benchmark configuration for evaluation. - - :param eval_candidate: The candidate to evaluate. - :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run - :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated - """ - - eval_candidate: EvalCandidate - scoring_params: dict[str, ScoringFnParams] = Field( - description="Map between scoring function id and parameters for each scoring function you want to run", - default_factory=dict, - ) - num_examples: int | None = Field( - description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated", - default=None, - ) - # we could optinally add any specific dataset config here - - -@json_schema_type -class EvaluateResponse(BaseModel): - """The response from an evaluation. - - :param generations: The generations from the evaluation. - :param scores: The scores from the evaluation. - """ - - generations: list[dict[str, Any]] - # each key in the dict is a scoring function name - scores: dict[str, ScoringResult] - - -class Eval(Protocol): - """Evaluations - - Llama Stack Evaluation API for running evaluations on model and agent candidates.""" - - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def run_eval( - self, - benchmark_id: str, - benchmark_config: BenchmarkConfig, - ) -> Job: - """Run an evaluation on a benchmark. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param benchmark_config: The configuration for the benchmark. - :returns: The job that was created to run the evaluation. - """ - ... - - @webmethod( - route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def evaluate_rows( - self, - benchmark_id: str, - input_rows: list[dict[str, Any]], - scoring_functions: list[str], - benchmark_config: BenchmarkConfig, - ) -> EvaluateResponse: - """Evaluate a list of rows on a benchmark. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param input_rows: The rows to evaluate. - :param scoring_functions: The scoring functions to use for the evaluation. - :param benchmark_config: The configuration for the benchmark. - :returns: EvaluateResponse object containing generations and scores. - """ - ... - - @webmethod( - route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def job_status(self, benchmark_id: str, job_id: str) -> Job: - """Get the status of a job. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to get the status of. - :returns: The status of the evaluation job. - """ - ... - - @webmethod( - route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", - method="DELETE", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) - async def job_cancel(self, benchmark_id: str, job_id: str) -> None: - """Cancel a job. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to cancel. - """ - ... - - @webmethod( - route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA - ) - async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: - """Get the result of a job. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to get the result of. - :returns: The result of the job. - """ - ... diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py deleted file mode 100644 index 189e4de19..000000000 --- a/llama_stack/apis/files/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .files import * diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py deleted file mode 100644 index f1d3764db..000000000 --- a/llama_stack/apis/files/files.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import StrEnum -from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable - -from fastapi import File, Form, Response, UploadFile -from pydantic import BaseModel, Field - -from llama_stack.apis.common.responses import Order -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - - -# OpenAI Files API Models -class OpenAIFilePurpose(StrEnum): - """ - Valid purpose values for OpenAI Files API. - """ - - ASSISTANTS = "assistants" - BATCH = "batch" - # TODO: Add other purposes as needed - - -@json_schema_type -class OpenAIFileObject(BaseModel): - """ - OpenAI File object as defined in the OpenAI Files API. - - :param object: The object type, which is always "file" - :param id: The file identifier, which can be referenced in the API endpoints - :param bytes: The size of the file, in bytes - :param created_at: The Unix timestamp (in seconds) for when the file was created - :param expires_at: The Unix timestamp (in seconds) for when the file expires - :param filename: The name of the file - :param purpose: The intended purpose of the file - """ - - object: Literal["file"] = "file" - id: str - bytes: int - created_at: int - expires_at: int - filename: str - purpose: OpenAIFilePurpose - - -@json_schema_type -class ExpiresAfter(BaseModel): - """ - Control expiration of uploaded files. - - Params: - - anchor, must be "created_at" - - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) - """ - - MIN: ClassVar[int] = 3600 # 1 hour - MAX: ClassVar[int] = 2592000 # 30 days - - anchor: Literal["created_at"] - seconds: int = Field(..., ge=3600, le=2592000) - - -@json_schema_type -class ListOpenAIFileResponse(BaseModel): - """ - Response for listing files in OpenAI Files API. - - :param data: List of file objects - :param has_more: Whether there are more files available beyond this page - :param first_id: ID of the first file in the list for pagination - :param last_id: ID of the last file in the list for pagination - :param object: The object type, which is always "list" - """ - - data: list[OpenAIFileObject] - has_more: bool - first_id: str - last_id: str - object: Literal["list"] = "list" - - -@json_schema_type -class OpenAIFileDeleteResponse(BaseModel): - """ - Response for deleting a file in OpenAI Files API. - - :param id: The file identifier that was deleted - :param object: The object type, which is always "file" - :param deleted: Whether the file was successfully deleted - """ - - id: str - object: Literal["file"] = "file" - deleted: bool - - -@runtime_checkable -@trace_protocol -class Files(Protocol): - """Files - - This API is used to upload documents that can be used with other Llama Stack APIs. - """ - - # OpenAI Files API Endpoints - @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1) - async def openai_upload_file( - self, - file: Annotated[UploadFile, File()], - purpose: Annotated[OpenAIFilePurpose, Form()], - expires_after: Annotated[ExpiresAfter | None, Form()] = None, - ) -> OpenAIFileObject: - """Upload file. - - Upload a file that can be used across various endpoints. - - The file upload should be a multipart form request with: - - file: The File object (not file name) to be uploaded. - - purpose: The intended purpose of the uploaded file. - - expires_after: Optional form values describing expiration for the file. - - :param file: The uploaded file object containing content and metadata (filename, content_type, etc.). - :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune"). - :param expires_after: Optional form values describing expiration for the file. - :returns: An OpenAIFileObject representing the uploaded file. - """ - ... - - @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1) - async def openai_list_files( - self, - after: str | None = None, - limit: int | None = 10000, - order: Order | None = Order.desc, - purpose: OpenAIFilePurpose | None = None, - ) -> ListOpenAIFileResponse: - """List files. - - Returns a list of files that belong to the user's organization. - - :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list. - :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000. - :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. - :param purpose: Only return files with the given purpose. - :returns: An ListOpenAIFileResponse containing the list of files. - """ - ... - - @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1) - async def openai_retrieve_file( - self, - file_id: str, - ) -> OpenAIFileObject: - """Retrieve file. - - Returns information about a specific file. - - :param file_id: The ID of the file to use for this request. - :returns: An OpenAIFileObject containing file information. - """ - ... - - @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1) - async def openai_delete_file( - self, - file_id: str, - ) -> OpenAIFileDeleteResponse: - """Delete file. - - :param file_id: The ID of the file to use for this request. - :returns: An OpenAIFileDeleteResponse indicating successful deletion. - """ - ... - - @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1) - async def openai_retrieve_file_content( - self, - file_id: str, - ) -> Response: - """Retrieve file content. - - Returns the contents of the specified file. - - :param file_id: The ID of the file to use for this request. - :returns: The raw file content as a binary response. - """ - ... diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py deleted file mode 100644 index f0c8783c1..000000000 --- a/llama_stack/apis/inference/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .inference import * diff --git a/llama_stack/apis/inference/event_logger.py b/llama_stack/apis/inference/event_logger.py deleted file mode 100644 index d97ece6d4..000000000 --- a/llama_stack/apis/inference/event_logger.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from termcolor import cprint - -from llama_stack.apis.inference import ( - ChatCompletionResponseEventType, - ChatCompletionResponseStreamChunk, -) - - -class LogEvent: - def __init__( - self, - content: str = "", - end: str = "\n", - color="white", - ): - self.content = content - self.color = color - self.end = "\n" if end is None else end - - def print(self, flush=True): - cprint(f"{self.content}", color=self.color, end=self.end, flush=flush) - - -class EventLogger: - async def log(self, event_generator): - async for chunk in event_generator: - if isinstance(chunk, ChatCompletionResponseStreamChunk): - event = chunk.event - if event.event_type == ChatCompletionResponseEventType.start: - yield LogEvent("Assistant> ", color="cyan", end="") - elif event.event_type == ChatCompletionResponseEventType.progress: - yield LogEvent(event.delta, color="yellow", end="") - elif event.event_type == ChatCompletionResponseEventType.complete: - yield LogEvent("") - else: - yield LogEvent("Assistant> ", color="cyan", end="") - yield LogEvent(chunk.completion_message.content, color="yellow") diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py deleted file mode 100644 index 027246470..000000000 --- a/llama_stack/apis/inference/inference.py +++ /dev/null @@ -1,1273 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import AsyncIterator -from enum import Enum -from typing import ( - Annotated, - Any, - Literal, - Protocol, - runtime_checkable, -) - -from fastapi import Body -from pydantic import BaseModel, Field, field_validator -from typing_extensions import TypedDict - -from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent -from llama_stack.apis.common.responses import Order -from llama_stack.apis.models import Model -from llama_stack.apis.telemetry import MetricResponseMixin -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.models.llama.datatypes import ( - BuiltinTool, - StopReason, - ToolCall, - ToolDefinition, - ToolPromptFormat, -) -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - -register_schema(ToolCall) -register_schema(ToolDefinition) - -from enum import StrEnum - - -@json_schema_type -class GreedySamplingStrategy(BaseModel): - """Greedy sampling strategy that selects the highest probability token at each step. - - :param type: Must be "greedy" to identify this sampling strategy - """ - - type: Literal["greedy"] = "greedy" - - -@json_schema_type -class TopPSamplingStrategy(BaseModel): - """Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. - - :param type: Must be "top_p" to identify this sampling strategy - :param temperature: Controls randomness in sampling. Higher values increase randomness - :param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95 - """ - - type: Literal["top_p"] = "top_p" - temperature: float | None = Field(..., gt=0.0) - top_p: float | None = 0.95 - - -@json_schema_type -class TopKSamplingStrategy(BaseModel): - """Top-k sampling strategy that restricts sampling to the k most likely tokens. - - :param type: Must be "top_k" to identify this sampling strategy - :param top_k: Number of top tokens to consider for sampling. Must be at least 1 - """ - - type: Literal["top_k"] = "top_k" - top_k: int = Field(..., ge=1) - - -SamplingStrategy = Annotated[ - GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy, - Field(discriminator="type"), -] -register_schema(SamplingStrategy, name="SamplingStrategy") - - -@json_schema_type -class SamplingParams(BaseModel): - """Sampling parameters. - - :param strategy: The sampling strategy. - :param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of - your prompt plus max_tokens cannot exceed the model's context length. - :param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens - based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - :param stop: Up to 4 sequences where the API will stop generating further tokens. - The returned text will not contain the stop sequence. - """ - - strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) - - max_tokens: int | None = 0 - repetition_penalty: float | None = 1.0 - stop: list[str] | None = None - - -class LogProbConfig(BaseModel): - """ - - :param top_k: How many tokens (for each position) to return log probabilities for. - """ - - top_k: int | None = 0 - - -class QuantizationType(Enum): - """Type of model quantization to run inference with. - - :cvar bf16: BFloat16 typically this means _no_ quantization - :cvar fp8_mixed: 8-bit floating point quantization with mixed precision - :cvar int4_mixed: 4-bit integer quantization with mixed precision - """ - - bf16 = "bf16" - fp8_mixed = "fp8_mixed" - int4_mixed = "int4_mixed" - - -@json_schema_type -class Fp8QuantizationConfig(BaseModel): - """Configuration for 8-bit floating point quantization. - - :param type: Must be "fp8_mixed" to identify this quantization type - """ - - type: Literal["fp8_mixed"] = "fp8_mixed" - - -@json_schema_type -class Bf16QuantizationConfig(BaseModel): - """Configuration for BFloat16 precision (typically no quantization). - - :param type: Must be "bf16" to identify this quantization type - """ - - type: Literal["bf16"] = "bf16" - - -@json_schema_type -class Int4QuantizationConfig(BaseModel): - """Configuration for 4-bit integer quantization. - - :param type: Must be "int4" to identify this quantization type - :param scheme: Quantization scheme to use. Defaults to "int4_weight_int8_dynamic_activation" - """ - - type: Literal["int4_mixed"] = "int4_mixed" - scheme: str | None = "int4_weight_int8_dynamic_activation" - - -QuantizationConfig = Annotated[ - Bf16QuantizationConfig | Fp8QuantizationConfig | Int4QuantizationConfig, - Field(discriminator="type"), -] - - -@json_schema_type -class UserMessage(BaseModel): - """A message from the user in a chat conversation. - - :param role: Must be "user" to identify this as a user message - :param content: The content of the message, which can include text and other media - :param context: (Optional) This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future. - """ - - role: Literal["user"] = "user" - content: InterleavedContent - context: InterleavedContent | None = None - - -@json_schema_type -class SystemMessage(BaseModel): - """A system message providing instructions or context to the model. - - :param role: Must be "system" to identify this as a system message - :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions). - """ - - role: Literal["system"] = "system" - content: InterleavedContent - - -@json_schema_type -class ToolResponseMessage(BaseModel): - """A message representing the result of a tool invocation. - - :param role: Must be "tool" to identify this as a tool response - :param call_id: Unique identifier for the tool call this response is for - :param content: The response content from the tool - """ - - role: Literal["tool"] = "tool" - call_id: str - content: InterleavedContent - - -@json_schema_type -class CompletionMessage(BaseModel): - """A message containing the model's (assistant) response in a chat conversation. - - :param role: Must be "assistant" to identify this as the model's response - :param content: The content of the model's response - :param stop_reason: Reason why the model stopped generating. Options are: - - `StopReason.end_of_turn`: The model finished generating the entire response. - - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - - `StopReason.out_of_tokens`: The model ran out of token budget. - :param tool_calls: List of tool calls. Each tool call is a ToolCall object. - """ - - role: Literal["assistant"] = "assistant" - content: InterleavedContent - stop_reason: StopReason - tool_calls: list[ToolCall] | None = Field(default_factory=lambda: []) - - -Message = Annotated[ - UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage, - Field(discriminator="role"), -] -register_schema(Message, name="Message") - - -@json_schema_type -class ToolResponse(BaseModel): - """Response from a tool invocation. - - :param call_id: Unique identifier for the tool call this response is for - :param tool_name: Name of the tool that was invoked - :param content: The response content from the tool - :param metadata: (Optional) Additional metadata about the tool response - """ - - call_id: str - tool_name: BuiltinTool | str - content: InterleavedContent - metadata: dict[str, Any] | None = None - - @field_validator("tool_name", mode="before") - @classmethod - def validate_field(cls, v): - if isinstance(v, str): - try: - return BuiltinTool(v) - except ValueError: - return v - return v - - -class ToolChoice(Enum): - """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model. - - :cvar auto: The model may use tools if it determines that is appropriate. - :cvar required: The model must use tools. - :cvar none: The model must not use tools. - """ - - auto = "auto" - required = "required" - none = "none" - - -@json_schema_type -class TokenLogProbs(BaseModel): - """Log probabilities for generated tokens. - - :param logprobs_by_token: Dictionary mapping tokens to their log probabilities - """ - - logprobs_by_token: dict[str, float] - - -class ChatCompletionResponseEventType(Enum): - """Types of events that can occur during chat completion. - - :cvar start: Inference has started - :cvar complete: Inference is complete and a full response is available - :cvar progress: Inference is in progress and a partial response is available - """ - - start = "start" - complete = "complete" - progress = "progress" - - -@json_schema_type -class ChatCompletionResponseEvent(BaseModel): - """An event during chat completion generation. - - :param event_type: Type of the event - :param delta: Content generated since last event. This can be one or more tokens, or a tool call. - :param logprobs: Optional log probabilities for generated tokens - :param stop_reason: Optional reason why generation stopped, if complete - """ - - event_type: ChatCompletionResponseEventType - delta: ContentDelta - logprobs: list[TokenLogProbs] | None = None - stop_reason: StopReason | None = None - - -class ResponseFormatType(StrEnum): - """Types of formats for structured (guided) decoding. - - :cvar json_schema: Response should conform to a JSON schema. In a Python SDK, this is often a `pydantic` model. - :cvar grammar: Response should conform to a BNF grammar - """ - - json_schema = "json_schema" - grammar = "grammar" - - -@json_schema_type -class JsonSchemaResponseFormat(BaseModel): - """Configuration for JSON schema-guided response generation. - - :param type: Must be "json_schema" to identify this format type - :param json_schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. - """ - - type: Literal[ResponseFormatType.json_schema] = ResponseFormatType.json_schema - json_schema: dict[str, Any] - - -@json_schema_type -class GrammarResponseFormat(BaseModel): - """Configuration for grammar-guided response generation. - - :param type: Must be "grammar" to identify this format type - :param bnf: The BNF grammar specification the response should conform to - """ - - type: Literal[ResponseFormatType.grammar] = ResponseFormatType.grammar - bnf: dict[str, Any] - - -ResponseFormat = Annotated[ - JsonSchemaResponseFormat | GrammarResponseFormat, - Field(discriminator="type"), -] -register_schema(ResponseFormat, name="ResponseFormat") - - -# This is an internally used class -class CompletionRequest(BaseModel): - model: str - content: InterleavedContent - sampling_params: SamplingParams | None = Field(default_factory=SamplingParams) - response_format: ResponseFormat | None = None - stream: bool | None = False - logprobs: LogProbConfig | None = None - - -@json_schema_type -class CompletionResponse(MetricResponseMixin): - """Response from a completion request. - - :param content: The generated completion text - :param stop_reason: Reason why generation stopped - :param logprobs: Optional log probabilities for generated tokens - """ - - content: str - stop_reason: StopReason - logprobs: list[TokenLogProbs] | None = None - - -@json_schema_type -class CompletionResponseStreamChunk(MetricResponseMixin): - """A chunk of a streamed completion response. - - :param delta: New content generated since last chunk. This can be one or more tokens. - :param stop_reason: Optional reason why generation stopped, if complete - :param logprobs: Optional log probabilities for generated tokens - """ - - delta: str - stop_reason: StopReason | None = None - logprobs: list[TokenLogProbs] | None = None - - -class SystemMessageBehavior(Enum): - """Config for how to override the default system prompt. - - :cvar append: Appends the provided system message to the default system prompt: - https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2/#-function-definitions-in-the-system-prompt- - :cvar replace: Replaces the default system prompt with the provided system message. The system message can include the string - '{{function_definitions}}' to indicate where the function definitions should be inserted. - """ - - append = "append" - replace = "replace" - - -@json_schema_type -class ToolConfig(BaseModel): - """Configuration for tool use. - - :param tool_choice: (Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - :param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls. - :param system_message_behavior: (Optional) Config for how to override the default system prompt. - - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string - '{{function_definitions}}' to indicate where the function definitions should be inserted. - """ - - tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto) - tool_prompt_format: ToolPromptFormat | None = Field(default=None) - system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append) - - def model_post_init(self, __context: Any) -> None: - if isinstance(self.tool_choice, str): - try: - self.tool_choice = ToolChoice[self.tool_choice] - except KeyError: - pass - - -# This is an internally used class -@json_schema_type -class ChatCompletionRequest(BaseModel): - model: str - messages: list[Message] - sampling_params: SamplingParams | None = Field(default_factory=SamplingParams) - - tools: list[ToolDefinition] | None = Field(default_factory=lambda: []) - tool_config: ToolConfig | None = Field(default_factory=ToolConfig) - - response_format: ResponseFormat | None = None - stream: bool | None = False - logprobs: LogProbConfig | None = None - - -@json_schema_type -class ChatCompletionResponseStreamChunk(MetricResponseMixin): - """A chunk of a streamed chat completion response. - - :param event: The event containing the new content - """ - - event: ChatCompletionResponseEvent - - -@json_schema_type -class ChatCompletionResponse(MetricResponseMixin): - """Response from a chat completion request. - - :param completion_message: The complete response message - :param logprobs: Optional log probabilities for generated tokens - """ - - completion_message: CompletionMessage - logprobs: list[TokenLogProbs] | None = None - - -@json_schema_type -class EmbeddingsResponse(BaseModel): - """Response containing generated embeddings. - - :param embeddings: List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id} - """ - - embeddings: list[list[float]] - - -@json_schema_type -class RerankData(BaseModel): - """A single rerank result from a reranking response. - - :param index: The original index of the document in the input list - :param relevance_score: The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance. - """ - - index: int - relevance_score: float - - -@json_schema_type -class RerankResponse(BaseModel): - """Response from a reranking request. - - :param data: List of rerank result objects, sorted by relevance score (descending) - """ - - data: list[RerankData] - - -@json_schema_type -class OpenAIChatCompletionContentPartTextParam(BaseModel): - """Text content part for OpenAI-compatible chat completion messages. - - :param type: Must be "text" to identify this as text content - :param text: The text content of the message - """ - - type: Literal["text"] = "text" - text: str - - -@json_schema_type -class OpenAIImageURL(BaseModel): - """Image URL specification for OpenAI-compatible chat completion messages. - - :param url: URL of the image to include in the message - :param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto" - """ - - url: str - detail: str | None = None - - -@json_schema_type -class OpenAIChatCompletionContentPartImageParam(BaseModel): - """Image content part for OpenAI-compatible chat completion messages. - - :param type: Must be "image_url" to identify this as image content - :param image_url: Image URL specification and processing details - """ - - type: Literal["image_url"] = "image_url" - image_url: OpenAIImageURL - - -@json_schema_type -class OpenAIFileFile(BaseModel): - file_data: str | None = None - file_id: str | None = None - filename: str | None = None - - -@json_schema_type -class OpenAIFile(BaseModel): - type: Literal["file"] = "file" - file: OpenAIFileFile - - -OpenAIChatCompletionContentPartParam = Annotated[ - OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile, - Field(discriminator="type"), -] -register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam") - - -OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam] - -OpenAIChatCompletionTextOnlyMessageContent = str | list[OpenAIChatCompletionContentPartTextParam] - - -@json_schema_type -class OpenAIUserMessageParam(BaseModel): - """A message from the user in an OpenAI-compatible chat completion request. - - :param role: Must be "user" to identify this as a user message - :param content: The content of the message, which can include text and other media - :param name: (Optional) The name of the user message participant. - """ - - role: Literal["user"] = "user" - content: OpenAIChatCompletionMessageContent - name: str | None = None - - -@json_schema_type -class OpenAISystemMessageParam(BaseModel): - """A system message providing instructions or context to the model. - - :param role: Must be "system" to identify this as a system message - :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions). - :param name: (Optional) The name of the system message participant. - """ - - role: Literal["system"] = "system" - content: OpenAIChatCompletionTextOnlyMessageContent - name: str | None = None - - -@json_schema_type -class OpenAIChatCompletionToolCallFunction(BaseModel): - """Function call details for OpenAI-compatible tool calls. - - :param name: (Optional) Name of the function to call - :param arguments: (Optional) Arguments to pass to the function as a JSON string - """ - - name: str | None = None - arguments: str | None = None - - -@json_schema_type -class OpenAIChatCompletionToolCall(BaseModel): - """Tool call specification for OpenAI-compatible chat completion responses. - - :param index: (Optional) Index of the tool call in the list - :param id: (Optional) Unique identifier for the tool call - :param type: Must be "function" to identify this as a function call - :param function: (Optional) Function call details - """ - - index: int | None = None - id: str | None = None - type: Literal["function"] = "function" - function: OpenAIChatCompletionToolCallFunction | None = None - - -@json_schema_type -class OpenAIAssistantMessageParam(BaseModel): - """A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. - - :param role: Must be "assistant" to identify this as the model's response - :param content: The content of the model's response - :param name: (Optional) The name of the assistant message participant. - :param tool_calls: List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object. - """ - - role: Literal["assistant"] = "assistant" - content: OpenAIChatCompletionTextOnlyMessageContent | None = None - name: str | None = None - tool_calls: list[OpenAIChatCompletionToolCall] | None = None - - -@json_schema_type -class OpenAIToolMessageParam(BaseModel): - """A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. - - :param role: Must be "tool" to identify this as a tool response - :param tool_call_id: Unique identifier for the tool call this response is for - :param content: The response content from the tool - """ - - role: Literal["tool"] = "tool" - tool_call_id: str - content: OpenAIChatCompletionTextOnlyMessageContent - - -@json_schema_type -class OpenAIDeveloperMessageParam(BaseModel): - """A message from the developer in an OpenAI-compatible chat completion request. - - :param role: Must be "developer" to identify this as a developer message - :param content: The content of the developer message - :param name: (Optional) The name of the developer message participant. - """ - - role: Literal["developer"] = "developer" - content: OpenAIChatCompletionTextOnlyMessageContent - name: str | None = None - - -OpenAIMessageParam = Annotated[ - OpenAIUserMessageParam - | OpenAISystemMessageParam - | OpenAIAssistantMessageParam - | OpenAIToolMessageParam - | OpenAIDeveloperMessageParam, - Field(discriminator="role"), -] -register_schema(OpenAIMessageParam, name="OpenAIMessageParam") - - -@json_schema_type -class OpenAIResponseFormatText(BaseModel): - """Text response format for OpenAI-compatible chat completion requests. - - :param type: Must be "text" to indicate plain text response format - """ - - type: Literal["text"] = "text" - - -@json_schema_type -class OpenAIJSONSchema(TypedDict, total=False): - """JSON schema specification for OpenAI-compatible structured response format. - - :param name: Name of the schema - :param description: (Optional) Description of the schema - :param strict: (Optional) Whether to enforce strict adherence to the schema - :param schema: (Optional) The JSON schema definition - """ - - name: str - description: str | None - strict: bool | None - - # Pydantic BaseModel cannot be used with a schema param, since it already - # has one. And, we don't want to alias here because then have to handle - # that alias when converting to OpenAI params. So, to support schema, - # we use a TypedDict. - schema: dict[str, Any] | None - - -@json_schema_type -class OpenAIResponseFormatJSONSchema(BaseModel): - """JSON schema response format for OpenAI-compatible chat completion requests. - - :param type: Must be "json_schema" to indicate structured JSON response format - :param json_schema: The JSON schema specification for the response - """ - - type: Literal["json_schema"] = "json_schema" - json_schema: OpenAIJSONSchema - - -@json_schema_type -class OpenAIResponseFormatJSONObject(BaseModel): - """JSON object response format for OpenAI-compatible chat completion requests. - - :param type: Must be "json_object" to indicate generic JSON object response format - """ - - type: Literal["json_object"] = "json_object" - - -OpenAIResponseFormatParam = Annotated[ - OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject, - Field(discriminator="type"), -] -register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam") - - -@json_schema_type -class OpenAITopLogProb(BaseModel): - """The top log probability for a token from an OpenAI-compatible chat completion response. - - :token: The token - :bytes: (Optional) The bytes for the token - :logprob: The log probability of the token - """ - - token: str - bytes: list[int] | None = None - logprob: float - - -@json_schema_type -class OpenAITokenLogProb(BaseModel): - """The log probability for a token from an OpenAI-compatible chat completion response. - - :token: The token - :bytes: (Optional) The bytes for the token - :logprob: The log probability of the token - :top_logprobs: The top log probabilities for the token - """ - - token: str - bytes: list[int] | None = None - logprob: float - top_logprobs: list[OpenAITopLogProb] - - -@json_schema_type -class OpenAIChoiceLogprobs(BaseModel): - """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. - - :param content: (Optional) The log probabilities for the tokens in the message - :param refusal: (Optional) The log probabilities for the tokens in the message - """ - - content: list[OpenAITokenLogProb] | None = None - refusal: list[OpenAITokenLogProb] | None = None - - -@json_schema_type -class OpenAIChoiceDelta(BaseModel): - """A delta from an OpenAI-compatible chat completion streaming response. - - :param content: (Optional) The content of the delta - :param refusal: (Optional) The refusal of the delta - :param role: (Optional) The role of the delta - :param tool_calls: (Optional) The tool calls of the delta - :param reasoning_content: (Optional) The reasoning content from the model (non-standard, for o1/o3 models) - """ - - content: str | None = None - refusal: str | None = None - role: str | None = None - tool_calls: list[OpenAIChatCompletionToolCall] | None = None - reasoning_content: str | None = None - - -@json_schema_type -class OpenAIChunkChoice(BaseModel): - """A chunk choice from an OpenAI-compatible chat completion streaming response. - - :param delta: The delta from the chunk - :param finish_reason: The reason the model stopped generating - :param index: The index of the choice - :param logprobs: (Optional) The log probabilities for the tokens in the message - """ - - delta: OpenAIChoiceDelta - finish_reason: str - index: int - logprobs: OpenAIChoiceLogprobs | None = None - - -@json_schema_type -class OpenAIChoice(BaseModel): - """A choice from an OpenAI-compatible chat completion response. - - :param message: The message from the model - :param finish_reason: The reason the model stopped generating - :param index: The index of the choice - :param logprobs: (Optional) The log probabilities for the tokens in the message - """ - - message: OpenAIMessageParam - finish_reason: str - index: int - logprobs: OpenAIChoiceLogprobs | None = None - - -class OpenAIChatCompletionUsageCompletionTokensDetails(BaseModel): - """Token details for output tokens in OpenAI chat completion usage. - - :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models) - """ - - reasoning_tokens: int | None = None - - -class OpenAIChatCompletionUsagePromptTokensDetails(BaseModel): - """Token details for prompt tokens in OpenAI chat completion usage. - - :param cached_tokens: Number of tokens retrieved from cache - """ - - cached_tokens: int | None = None - - -@json_schema_type -class OpenAIChatCompletionUsage(BaseModel): - """Usage information for OpenAI chat completion. - - :param prompt_tokens: Number of tokens in the prompt - :param completion_tokens: Number of tokens in the completion - :param total_tokens: Total tokens used (prompt + completion) - :param input_tokens_details: Detailed breakdown of input token usage - :param output_tokens_details: Detailed breakdown of output token usage - """ - - prompt_tokens: int - completion_tokens: int - total_tokens: int - prompt_tokens_details: OpenAIChatCompletionUsagePromptTokensDetails | None = None - completion_tokens_details: OpenAIChatCompletionUsageCompletionTokensDetails | None = None - - -@json_schema_type -class OpenAIChatCompletion(BaseModel): - """Response from an OpenAI-compatible chat completion request. - - :param id: The ID of the chat completion - :param choices: List of choices - :param object: The object type, which will be "chat.completion" - :param created: The Unix timestamp in seconds when the chat completion was created - :param model: The model that was used to generate the chat completion - :param usage: Token usage information for the completion - """ - - id: str - choices: list[OpenAIChoice] - object: Literal["chat.completion"] = "chat.completion" - created: int - model: str - usage: OpenAIChatCompletionUsage | None = None - - -@json_schema_type -class OpenAIChatCompletionChunk(BaseModel): - """Chunk from a streaming response to an OpenAI-compatible chat completion request. - - :param id: The ID of the chat completion - :param choices: List of choices - :param object: The object type, which will be "chat.completion.chunk" - :param created: The Unix timestamp in seconds when the chat completion was created - :param model: The model that was used to generate the chat completion - :param usage: Token usage information (typically included in final chunk with stream_options) - """ - - id: str - choices: list[OpenAIChunkChoice] - object: Literal["chat.completion.chunk"] = "chat.completion.chunk" - created: int - model: str - usage: OpenAIChatCompletionUsage | None = None - - -@json_schema_type -class OpenAICompletionLogprobs(BaseModel): - """The log probabilities for the tokens in the message from an OpenAI-compatible completion response. - - :text_offset: (Optional) The offset of the token in the text - :token_logprobs: (Optional) The log probabilities for the tokens - :tokens: (Optional) The tokens - :top_logprobs: (Optional) The top log probabilities for the tokens - """ - - text_offset: list[int] | None = None - token_logprobs: list[float] | None = None - tokens: list[str] | None = None - top_logprobs: list[dict[str, float]] | None = None - - -@json_schema_type -class OpenAICompletionChoice(BaseModel): - """A choice from an OpenAI-compatible completion response. - - :finish_reason: The reason the model stopped generating - :text: The text of the choice - :index: The index of the choice - :logprobs: (Optional) The log probabilities for the tokens in the choice - """ - - finish_reason: str - text: str - index: int - logprobs: OpenAIChoiceLogprobs | None = None - - -@json_schema_type -class OpenAICompletion(BaseModel): - """Response from an OpenAI-compatible completion request. - - :id: The ID of the completion - :choices: List of choices - :created: The Unix timestamp in seconds when the completion was created - :model: The model that was used to generate the completion - :object: The object type, which will be "text_completion" - """ - - id: str - choices: list[OpenAICompletionChoice] - created: int - model: str - object: Literal["text_completion"] = "text_completion" - - -@json_schema_type -class OpenAIEmbeddingData(BaseModel): - """A single embedding data object from an OpenAI-compatible embeddings response. - - :param object: The object type, which will be "embedding" - :param embedding: The embedding vector as a list of floats (when encoding_format="float") or as a base64-encoded string (when encoding_format="base64") - :param index: The index of the embedding in the input list - """ - - object: Literal["embedding"] = "embedding" - # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData - embedding: list[float] | str - index: int - - -@json_schema_type -class OpenAIEmbeddingUsage(BaseModel): - """Usage information for an OpenAI-compatible embeddings response. - - :param prompt_tokens: The number of tokens in the input - :param total_tokens: The total number of tokens used - """ - - prompt_tokens: int - total_tokens: int - - -@json_schema_type -class OpenAIEmbeddingsResponse(BaseModel): - """Response from an OpenAI-compatible embeddings request. - - :param object: The object type, which will be "list" - :param data: List of embedding data objects - :param model: The model that was used to generate the embeddings - :param usage: Usage information - """ - - object: Literal["list"] = "list" - data: list[OpenAIEmbeddingData] - model: str - usage: OpenAIEmbeddingUsage - - -class ModelStore(Protocol): - async def get_model(self, identifier: str) -> Model: ... - - -class TextTruncation(Enum): - """Config for how to truncate text for embedding when text is longer than the model's max sequence length. Start and End semantics depend on whether the language is left-to-right or right-to-left. - - :cvar none: No truncation (default). If the text is longer than the model's max sequence length, you will get an error. - :cvar start: Truncate from the start - :cvar end: Truncate from the end - """ - - none = "none" - start = "start" - end = "end" - - -class EmbeddingTaskType(Enum): - """How is the embedding being used? This is only supported by asymmetric embedding models. - - :cvar query: Used for a query for semantic search. - :cvar document: Used at indexing time when ingesting documents. - """ - - query = "query" - document = "document" - - -class OpenAICompletionWithInputMessages(OpenAIChatCompletion): - input_messages: list[OpenAIMessageParam] - - -@json_schema_type -class ListOpenAIChatCompletionResponse(BaseModel): - """Response from listing OpenAI-compatible chat completions. - - :param data: List of chat completion objects with their input messages - :param has_more: Whether there are more completions available beyond this list - :param first_id: ID of the first completion in this list - :param last_id: ID of the last completion in this list - :param object: Must be "list" to identify this as a list response - """ - - data: list[OpenAICompletionWithInputMessages] - has_more: bool - first_id: str - last_id: str - object: Literal["list"] = "list" - - -# extra_body can be accessed via .model_extra -@json_schema_type -class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): - """Request parameters for OpenAI-compatible completion endpoint. - - :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - :param prompt: The prompt to generate a completion for. - :param best_of: (Optional) The number of completions to generate. - :param echo: (Optional) Whether to echo the prompt. - :param frequency_penalty: (Optional) The penalty for repeated tokens. - :param logit_bias: (Optional) The logit bias to use. - :param logprobs: (Optional) The log probabilities to use. - :param max_tokens: (Optional) The maximum number of tokens to generate. - :param n: (Optional) The number of completions to generate. - :param presence_penalty: (Optional) The penalty for repeated tokens. - :param seed: (Optional) The seed to use. - :param stop: (Optional) The stop tokens to use. - :param stream: (Optional) Whether to stream the response. - :param stream_options: (Optional) The stream options to use. - :param temperature: (Optional) The temperature to use. - :param top_p: (Optional) The top p to use. - :param user: (Optional) The user to use. - :param suffix: (Optional) The suffix that should be appended to the completion. - """ - - # Standard OpenAI completion parameters - model: str - prompt: str | list[str] | list[int] | list[list[int]] - best_of: int | None = None - echo: bool | None = None - frequency_penalty: float | None = None - logit_bias: dict[str, float] | None = None - logprobs: bool | None = None - max_tokens: int | None = None - n: int | None = None - presence_penalty: float | None = None - seed: int | None = None - stop: str | list[str] | None = None - stream: bool | None = None - stream_options: dict[str, Any] | None = None - temperature: float | None = None - top_p: float | None = None - user: str | None = None - suffix: str | None = None - - -# extra_body can be accessed via .model_extra -@json_schema_type -class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): - """Request parameters for OpenAI-compatible chat completion endpoint. - - :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - :param messages: List of messages in the conversation. - :param frequency_penalty: (Optional) The penalty for repeated tokens. - :param function_call: (Optional) The function call to use. - :param functions: (Optional) List of functions to use. - :param logit_bias: (Optional) The logit bias to use. - :param logprobs: (Optional) The log probabilities to use. - :param max_completion_tokens: (Optional) The maximum number of tokens to generate. - :param max_tokens: (Optional) The maximum number of tokens to generate. - :param n: (Optional) The number of completions to generate. - :param parallel_tool_calls: (Optional) Whether to parallelize tool calls. - :param presence_penalty: (Optional) The penalty for repeated tokens. - :param response_format: (Optional) The response format to use. - :param seed: (Optional) The seed to use. - :param stop: (Optional) The stop tokens to use. - :param stream: (Optional) Whether to stream the response. - :param stream_options: (Optional) The stream options to use. - :param temperature: (Optional) The temperature to use. - :param tool_choice: (Optional) The tool choice to use. - :param tools: (Optional) The tools to use. - :param top_logprobs: (Optional) The top log probabilities to use. - :param top_p: (Optional) The top p to use. - :param user: (Optional) The user to use. - """ - - # Standard OpenAI chat completion parameters - model: str - messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)] - frequency_penalty: float | None = None - function_call: str | dict[str, Any] | None = None - functions: list[dict[str, Any]] | None = None - logit_bias: dict[str, float] | None = None - logprobs: bool | None = None - max_completion_tokens: int | None = None - max_tokens: int | None = None - n: int | None = None - parallel_tool_calls: bool | None = None - presence_penalty: float | None = None - response_format: OpenAIResponseFormatParam | None = None - seed: int | None = None - stop: str | list[str] | None = None - stream: bool | None = None - stream_options: dict[str, Any] | None = None - temperature: float | None = None - tool_choice: str | dict[str, Any] | None = None - tools: list[dict[str, Any]] | None = None - top_logprobs: int | None = None - top_p: float | None = None - user: str | None = None - - -# extra_body can be accessed via .model_extra -@json_schema_type -class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): - """Request parameters for OpenAI-compatible embeddings endpoint. - - :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. - :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. - :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". - :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - """ - - model: str - input: str | list[str] - encoding_format: str | None = "float" - dimensions: int | None = None - user: str | None = None - - -@runtime_checkable -@trace_protocol -class InferenceProvider(Protocol): - """ - This protocol defines the interface that should be implemented by all inference providers. - """ - - API_NAMESPACE: str = "Inference" - - model_store: ModelStore | None = None - - @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def rerank( - self, - model: str, - query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, - items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], - max_num_results: int | None = None, - ) -> RerankResponse: - """Rerank a list of documents based on their relevance to a query. - - :param model: The identifier of the reranking model to use. - :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length. - :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length. - :param max_num_results: (Optional) Maximum number of results to return. Default: returns all. - :returns: RerankResponse with indices sorted by relevance score (descending). - """ - raise NotImplementedError("Reranking is not implemented") - return # this is so mypy's safe-super rule will consider the method concrete - - @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1) - async def openai_completion( - self, - params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)], - ) -> OpenAICompletion: - """Create completion. - - Generate an OpenAI-compatible completion for the given prompt using the specified model. - :returns: An OpenAICompletion. - """ - ... - - @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1) - async def openai_chat_completion( - self, - params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)], - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - """Create chat completions. - - Generate an OpenAI-compatible chat completion for the given messages using the specified model. - :returns: An OpenAIChatCompletion. - """ - ... - - @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) - async def openai_embeddings( - self, - params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], - ) -> OpenAIEmbeddingsResponse: - """Create embeddings. - - Generate OpenAI-compatible embeddings for the given input using the specified model. - :returns: An OpenAIEmbeddingsResponse containing the embeddings. - """ - ... - - -class Inference(InferenceProvider): - """Inference - - Llama Stack Inference API for generating completions, chat completions, and embeddings. - - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. - """ - - @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1) - async def list_chat_completions( - self, - after: str | None = None, - limit: int | None = 20, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIChatCompletionResponse: - """List chat completions. - - :param after: The ID of the last chat completion to return. - :param limit: The maximum number of chat completions to return. - :param model: The model to filter by. - :param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc". - :returns: A ListOpenAIChatCompletionResponse. - """ - raise NotImplementedError("List chat completions is not implemented") - - @webmethod( - route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1) - async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: - """Get chat completion. - - Describe a chat completion by its ID. - - :param completion_id: ID of the chat completion. - :returns: A OpenAICompletionWithInputMessages. - """ - raise NotImplementedError("Get chat completion is not implemented") diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py deleted file mode 100644 index 016937e3d..000000000 --- a/llama_stack/apis/inspect/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .inspect import * diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py deleted file mode 100644 index 8b0996e69..000000000 --- a/llama_stack/apis/inspect/inspect.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.datatypes import HealthStatus -from llama_stack.schema_utils import json_schema_type, webmethod - - -@json_schema_type -class RouteInfo(BaseModel): - """Information about an API route including its path, method, and implementing providers. - - :param route: The API endpoint path - :param method: HTTP method for the route - :param provider_types: List of provider types that implement this route - """ - - route: str - method: str - provider_types: list[str] - - -@json_schema_type -class HealthInfo(BaseModel): - """Health status information for the service. - - :param status: Current health status of the service - """ - - status: HealthStatus - - -@json_schema_type -class VersionInfo(BaseModel): - """Version information for the service. - - :param version: Version number of the service - """ - - version: str - - -class ListRoutesResponse(BaseModel): - """Response containing a list of all available API routes. - - :param data: List of available route information objects - """ - - data: list[RouteInfo] - - -@runtime_checkable -class Inspect(Protocol): - """Inspect - - APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. - """ - - @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1) - async def list_routes(self) -> ListRoutesResponse: - """List routes. - - List all available API routes with their methods and implementing providers. - - :returns: Response containing information about all available routes. - """ - ... - - @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False) - async def health(self) -> HealthInfo: - """Get health status. - - Get the current health status of the service. - - :returns: Health information indicating if the service is operational. - """ - ... - - @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False) - async def version(self) -> VersionInfo: - """Get version. - - Get the version of the service. - - :returns: Version information containing the service version number. - """ - ... diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py deleted file mode 100644 index ee90106b6..000000000 --- a/llama_stack/apis/models/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .models import * diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py deleted file mode 100644 index 10949cb95..000000000 --- a/llama_stack/apis/models/models.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import StrEnum -from typing import Any, Literal, Protocol, runtime_checkable - -from pydantic import BaseModel, ConfigDict, Field, field_validator - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - - -class CommonModelFields(BaseModel): - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Any additional metadata for this model", - ) - - -@json_schema_type -class ModelType(StrEnum): - """Enumeration of supported model types in Llama Stack. - :cvar llm: Large language model for text generation and completion - :cvar embedding: Embedding model for converting text to vector representations - """ - - llm = "llm" - embedding = "embedding" - - -@json_schema_type -class Model(CommonModelFields, Resource): - """A model resource representing an AI model registered in Llama Stack. - - :param type: The resource type, always 'model' for model resources - :param model_type: The type of model (LLM or embedding model) - :param metadata: Any additional metadata for this model - :param identifier: Unique identifier for this resource in llama stack - :param provider_resource_id: Unique identifier for this resource in the provider - :param provider_id: ID of the provider that owns this resource - """ - - type: Literal[ResourceType.model] = ResourceType.model - - @property - def model_id(self) -> str: - return self.identifier - - @property - def provider_model_id(self) -> str: - assert self.provider_resource_id is not None, "Provider resource ID must be set" - return self.provider_resource_id - - model_config = ConfigDict(protected_namespaces=()) - - model_type: ModelType = Field(default=ModelType.llm) - - @field_validator("provider_resource_id") - @classmethod - def validate_provider_resource_id(cls, v): - if v is None: - raise ValueError("provider_resource_id cannot be None") - return v - - -class ModelInput(CommonModelFields): - model_id: str - provider_id: str | None = None - provider_model_id: str | None = None - model_type: ModelType | None = ModelType.llm - model_config = ConfigDict(protected_namespaces=()) - - -class ListModelsResponse(BaseModel): - data: list[Model] - - -@json_schema_type -class OpenAIModel(BaseModel): - """A model from OpenAI. - - :id: The ID of the model - :object: The object type, which will be "model" - :created: The Unix timestamp in seconds when the model was created - :owned_by: The owner of the model - """ - - id: str - object: Literal["model"] = "model" - created: int - owned_by: str - - -class OpenAIListModelsResponse(BaseModel): - data: list[OpenAIModel] - - -@runtime_checkable -@trace_protocol -class Models(Protocol): - @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) - async def list_models(self) -> ListModelsResponse: - """List all models. - - :returns: A ListModelsResponse. - """ - ... - - @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - async def openai_list_models(self) -> OpenAIListModelsResponse: - """List models using the OpenAI API. - - :returns: A OpenAIListModelsResponse. - """ - ... - - @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_model( - self, - model_id: str, - ) -> Model: - """Get model. - - Get a model by its identifier. - - :param model_id: The identifier of the model to get. - :returns: A Model. - """ - ... - - @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1) - async def register_model( - self, - model_id: str, - provider_model_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - model_type: ModelType | None = None, - ) -> Model: - """Register model. - - Register a model. - - :param model_id: The identifier of the model to register. - :param provider_model_id: The identifier of the model in the provider. - :param provider_id: The identifier of the provider. - :param metadata: Any additional metadata for this model. - :param model_type: The type of model to register. - :returns: A Model. - """ - ... - - @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) - async def unregister_model( - self, - model_id: str, - ) -> None: - """Unregister model. - - Unregister a model. - - :param model_id: The identifier of the model to unregister. - """ - ... diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py deleted file mode 100644 index 695575a30..000000000 --- a/llama_stack/apis/post_training/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .post_training import * diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py deleted file mode 100644 index 30a51f765..000000000 --- a/llama_stack/apis/post_training/post_training.py +++ /dev/null @@ -1,374 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from datetime import datetime -from enum import Enum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.job_types import JobStatus -from llama_stack.apis.common.training_types import Checkpoint -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -@json_schema_type -class OptimizerType(Enum): - """Available optimizer algorithms for training. - :cvar adam: Adaptive Moment Estimation optimizer - :cvar adamw: AdamW optimizer with weight decay - :cvar sgd: Stochastic Gradient Descent optimizer - """ - - adam = "adam" - adamw = "adamw" - sgd = "sgd" - - -@json_schema_type -class DatasetFormat(Enum): - """Format of the training dataset. - :cvar instruct: Instruction-following format with prompt and completion - :cvar dialog: Multi-turn conversation format with messages - """ - - instruct = "instruct" - dialog = "dialog" - - -@json_schema_type -class DataConfig(BaseModel): - """Configuration for training data and data loading. - - :param dataset_id: Unique identifier for the training dataset - :param batch_size: Number of samples per training batch - :param shuffle: Whether to shuffle the dataset during training - :param data_format: Format of the dataset (instruct or dialog) - :param validation_dataset_id: (Optional) Unique identifier for the validation dataset - :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency - :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens - """ - - dataset_id: str - batch_size: int - shuffle: bool - data_format: DatasetFormat - validation_dataset_id: str | None = None - packed: bool | None = False - train_on_input: bool | None = False - - -@json_schema_type -class OptimizerConfig(BaseModel): - """Configuration parameters for the optimization algorithm. - - :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd) - :param lr: Learning rate for the optimizer - :param weight_decay: Weight decay coefficient for regularization - :param num_warmup_steps: Number of steps for learning rate warmup - """ - - optimizer_type: OptimizerType - lr: float - weight_decay: float - num_warmup_steps: int - - -@json_schema_type -class EfficiencyConfig(BaseModel): - """Configuration for memory and compute efficiency optimizations. - - :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage - :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory - :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping - :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU - """ - - enable_activation_checkpointing: bool | None = False - enable_activation_offloading: bool | None = False - memory_efficient_fsdp_wrap: bool | None = False - fsdp_cpu_offload: bool | None = False - - -@json_schema_type -class TrainingConfig(BaseModel): - """Comprehensive configuration for the training process. - - :param n_epochs: Number of training epochs to run - :param max_steps_per_epoch: Maximum number of steps to run per epoch - :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating - :param max_validation_steps: (Optional) Maximum number of validation steps per epoch - :param data_config: (Optional) Configuration for data loading and formatting - :param optimizer_config: (Optional) Configuration for the optimization algorithm - :param efficiency_config: (Optional) Configuration for memory and compute optimizations - :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32) - """ - - n_epochs: int - max_steps_per_epoch: int = 1 - gradient_accumulation_steps: int = 1 - max_validation_steps: int | None = 1 - data_config: DataConfig | None = None - optimizer_config: OptimizerConfig | None = None - efficiency_config: EfficiencyConfig | None = None - dtype: str | None = "bf16" - - -@json_schema_type -class LoraFinetuningConfig(BaseModel): - """Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - - :param type: Algorithm type identifier, always "LoRA" - :param lora_attn_modules: List of attention module names to apply LoRA to - :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers - :param apply_lora_to_output: Whether to apply LoRA to output projection layers - :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters) - :param alpha: LoRA scaling parameter that controls adaptation strength - :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) - :param quantize_base: (Optional) Whether to quantize the base model weights - """ - - type: Literal["LoRA"] = "LoRA" - lora_attn_modules: list[str] - apply_lora_to_mlp: bool - apply_lora_to_output: bool - rank: int - alpha: int - use_dora: bool | None = False - quantize_base: bool | None = False - - -@json_schema_type -class QATFinetuningConfig(BaseModel): - """Configuration for Quantization-Aware Training (QAT) fine-tuning. - - :param type: Algorithm type identifier, always "QAT" - :param quantizer_name: Name of the quantization algorithm to use - :param group_size: Size of groups for grouped quantization - """ - - type: Literal["QAT"] = "QAT" - quantizer_name: str - group_size: int - - -AlgorithmConfig = Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")] -register_schema(AlgorithmConfig, name="AlgorithmConfig") - - -@json_schema_type -class PostTrainingJobLogStream(BaseModel): - """Stream of logs from a finetuning job. - - :param job_uuid: Unique identifier for the training job - :param log_lines: List of log message strings from the training process - """ - - job_uuid: str - log_lines: list[str] - - -@json_schema_type -class RLHFAlgorithm(Enum): - """Available reinforcement learning from human feedback algorithms. - :cvar dpo: Direct Preference Optimization algorithm - """ - - dpo = "dpo" - - -@json_schema_type -class DPOLossType(Enum): - sigmoid = "sigmoid" - hinge = "hinge" - ipo = "ipo" - kto_pair = "kto_pair" - - -@json_schema_type -class DPOAlignmentConfig(BaseModel): - """Configuration for Direct Preference Optimization (DPO) alignment. - - :param beta: Temperature parameter for the DPO loss - :param loss_type: The type of loss function to use for DPO - """ - - beta: float - loss_type: DPOLossType = DPOLossType.sigmoid - - -@json_schema_type -class PostTrainingRLHFRequest(BaseModel): - """Request to finetune a model using reinforcement learning from human feedback. - - :param job_uuid: Unique identifier for the training job - :param finetuned_model: URL or path to the base model to fine-tune - :param dataset_id: Unique identifier for the training dataset - :param validation_dataset_id: Unique identifier for the validation dataset - :param algorithm: RLHF algorithm to use for training - :param algorithm_config: Configuration parameters for the RLHF algorithm - :param optimizer_config: Configuration parameters for the optimization algorithm - :param training_config: Configuration parameters for the training process - :param hyperparam_search_config: Configuration for hyperparameter search - :param logger_config: Configuration for training logging - """ - - job_uuid: str - - finetuned_model: URL - - dataset_id: str - validation_dataset_id: str - - algorithm: RLHFAlgorithm - algorithm_config: DPOAlignmentConfig - - optimizer_config: OptimizerConfig - training_config: TrainingConfig - - # TODO: define these - hyperparam_search_config: dict[str, Any] - logger_config: dict[str, Any] - - -class PostTrainingJob(BaseModel): - job_uuid: str - - -@json_schema_type -class PostTrainingJobStatusResponse(BaseModel): - """Status of a finetuning job. - - :param job_uuid: Unique identifier for the training job - :param status: Current status of the training job - :param scheduled_at: (Optional) Timestamp when the job was scheduled - :param started_at: (Optional) Timestamp when the job execution began - :param completed_at: (Optional) Timestamp when the job finished, if completed - :param resources_allocated: (Optional) Information about computational resources allocated to the job - :param checkpoints: List of model checkpoints created during training - """ - - job_uuid: str - status: JobStatus - - scheduled_at: datetime | None = None - started_at: datetime | None = None - completed_at: datetime | None = None - - resources_allocated: dict[str, Any] | None = None - - checkpoints: list[Checkpoint] = Field(default_factory=list) - - -class ListPostTrainingJobsResponse(BaseModel): - data: list[PostTrainingJob] - - -@json_schema_type -class PostTrainingJobArtifactsResponse(BaseModel): - """Artifacts of a finetuning job. - - :param job_uuid: Unique identifier for the training job - :param checkpoints: List of model checkpoints created during training - """ - - job_uuid: str - checkpoints: list[Checkpoint] = Field(default_factory=list) - - # TODO(ashwin): metrics, evals - - -class PostTraining(Protocol): - @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def supervised_fine_tune( - self, - job_uuid: str, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - model: str | None = Field( - default=None, - description="Model descriptor for training if not in provider config`", - ), - checkpoint_dir: str | None = None, - algorithm_config: AlgorithmConfig | None = None, - ) -> PostTrainingJob: - """Run supervised fine-tuning of a model. - - :param job_uuid: The UUID of the job to create. - :param training_config: The training configuration. - :param hyperparam_search_config: The hyperparam search configuration. - :param logger_config: The logger configuration. - :param model: The model to fine-tune. - :param checkpoint_dir: The directory to save checkpoint(s) to. - :param algorithm_config: The algorithm configuration. - :returns: A PostTrainingJob. - """ - ... - - @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def preference_optimize( - self, - job_uuid: str, - finetuned_model: str, - algorithm_config: DPOAlignmentConfig, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - ) -> PostTrainingJob: - """Run preference optimization of a model. - - :param job_uuid: The UUID of the job to create. - :param finetuned_model: The model to fine-tune. - :param algorithm_config: The algorithm configuration. - :param training_config: The training configuration. - :param hyperparam_search_config: The hyperparam search configuration. - :param logger_config: The logger configuration. - :returns: A PostTrainingJob. - """ - ... - - @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_training_jobs(self) -> ListPostTrainingJobsResponse: - """Get all training jobs. - - :returns: A ListPostTrainingJobsResponse. - """ - ... - - @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: - """Get the status of a training job. - - :param job_uuid: The UUID of the job to get the status of. - :returns: A PostTrainingJobStatusResponse. - """ - ... - - @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA) - async def cancel_training_job(self, job_uuid: str) -> None: - """Cancel a training job. - - :param job_uuid: The UUID of the job to cancel. - """ - ... - - @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA) - async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: - """Get the artifacts of a training job. - - :param job_uuid: The UUID of the job to get the artifacts of. - :returns: A PostTrainingJobArtifactsResponse. - """ - ... diff --git a/llama_stack/apis/prompts/__init__.py b/llama_stack/apis/prompts/__init__.py deleted file mode 100644 index 6070f3450..000000000 --- a/llama_stack/apis/prompts/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .prompts import ListPromptsResponse, Prompt, Prompts - -__all__ = ["Prompt", "Prompts", "ListPromptsResponse"] diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py deleted file mode 100644 index b39c363c7..000000000 --- a/llama_stack/apis/prompts/prompts.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import re -import secrets -from typing import Protocol, runtime_checkable - -from pydantic import BaseModel, Field, field_validator, model_validator - -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - - -@json_schema_type -class Prompt(BaseModel): - """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. - - :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API. - :param version: Version (integer starting at 1, incremented on save) - :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>' - :param variables: List of prompt variable names that can be used in the prompt template - :param is_default: Boolean indicating whether this version is the default version for this prompt - """ - - prompt: str | None = Field(default=None, description="The system prompt with variable placeholders") - version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1) - prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'") - variables: list[str] = Field( - default_factory=list, description="List of variable names that can be used in the prompt template" - ) - is_default: bool = Field( - default=False, description="Boolean indicating whether this version is the default version" - ) - - @field_validator("prompt_id") - @classmethod - def validate_prompt_id(cls, prompt_id: str) -> str: - if not isinstance(prompt_id, str): - raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'") - - if not prompt_id.startswith("pmpt_"): - raise ValueError("prompt_id must start with 'pmpt_' prefix") - - hex_part = prompt_id[5:] - if len(hex_part) != 48: - raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)") - - for char in hex_part: - if char not in "0123456789abcdef": - raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]") - - return prompt_id - - @field_validator("version") - @classmethod - def validate_version(cls, prompt_version: int) -> int: - if prompt_version < 1: - raise ValueError("version must be >= 1") - return prompt_version - - @model_validator(mode="after") - def validate_prompt_variables(self): - """Validate that all variables used in the prompt are declared in the variables list.""" - if not self.prompt: - return self - - prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt)) - declared_variables = set(self.variables) - - undeclared = prompt_variables - declared_variables - if undeclared: - raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}") - - return self - - @classmethod - def generate_prompt_id(cls) -> str: - # Generate 48 hex characters (24 bytes) - random_bytes = secrets.token_bytes(24) - hex_string = random_bytes.hex() - return f"pmpt_{hex_string}" - - -class ListPromptsResponse(BaseModel): - """Response model to list prompts.""" - - data: list[Prompt] - - -@runtime_checkable -@trace_protocol -class Prompts(Protocol): - """Prompts - - Protocol for prompt management operations.""" - - @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1) - async def list_prompts(self) -> ListPromptsResponse: - """List all prompts. - - :returns: A ListPromptsResponse containing all prompts. - """ - ... - - @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1) - async def list_prompt_versions( - self, - prompt_id: str, - ) -> ListPromptsResponse: - """List prompt versions. - - List all versions of a specific prompt. - - :param prompt_id: The identifier of the prompt to list versions for. - :returns: A ListPromptsResponse containing all versions of the prompt. - """ - ... - - @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1) - async def get_prompt( - self, - prompt_id: str, - version: int | None = None, - ) -> Prompt: - """Get prompt. - - Get a prompt by its identifier and optional version. - - :param prompt_id: The identifier of the prompt to get. - :param version: The version of the prompt to get (defaults to latest). - :returns: A Prompt resource. - """ - ... - - @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1) - async def create_prompt( - self, - prompt: str, - variables: list[str] | None = None, - ) -> Prompt: - """Create prompt. - - Create a new prompt. - - :param prompt: The prompt text content with variable placeholders. - :param variables: List of variable names that can be used in the prompt template. - :returns: The created Prompt resource. - """ - ... - - @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1) - async def update_prompt( - self, - prompt_id: str, - prompt: str, - version: int, - variables: list[str] | None = None, - set_as_default: bool = True, - ) -> Prompt: - """Update prompt. - - Update an existing prompt (increments version). - - :param prompt_id: The identifier of the prompt to update. - :param prompt: The updated prompt text content. - :param version: The current version of the prompt being updated. - :param variables: Updated list of variable names that can be used in the prompt template. - :param set_as_default: Set the new version as the default (default=True). - :returns: The updated Prompt resource with incremented version. - """ - ... - - @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1) - async def delete_prompt( - self, - prompt_id: str, - ) -> None: - """Delete prompt. - - Delete a prompt. - - :param prompt_id: The identifier of the prompt to delete. - """ - ... - - @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1) - async def set_default_version( - self, - prompt_id: str, - version: int, - ) -> Prompt: - """Set prompt version. - - Set which version of a prompt should be the default in get_prompt (latest). - - :param prompt_id: The identifier of the prompt. - :param version: The version to set as default. - :returns: The prompt with the specified version now set as default. - """ - ... diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py deleted file mode 100644 index e35e2fe47..000000000 --- a/llama_stack/apis/providers/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .providers import * diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py deleted file mode 100644 index e1872571d..000000000 --- a/llama_stack/apis/providers/providers.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.datatypes import HealthResponse -from llama_stack.schema_utils import json_schema_type, webmethod - - -@json_schema_type -class ProviderInfo(BaseModel): - """Information about a registered provider including its configuration and health status. - - :param api: The API name this provider implements - :param provider_id: Unique identifier for the provider - :param provider_type: The type of provider implementation - :param config: Configuration parameters for the provider - :param health: Current health status of the provider - """ - - api: str - provider_id: str - provider_type: str - config: dict[str, Any] - health: HealthResponse - - -class ListProvidersResponse(BaseModel): - """Response containing a list of all available providers. - - :param data: List of provider information objects - """ - - data: list[ProviderInfo] - - -@runtime_checkable -class Providers(Protocol): - """Providers - - Providers API for inspecting, listing, and modifying providers and their configurations. - """ - - @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1) - async def list_providers(self) -> ListProvidersResponse: - """List providers. - - List all available providers. - - :returns: A ListProvidersResponse containing information about all providers. - """ - ... - - @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1) - async def inspect_provider(self, provider_id: str) -> ProviderInfo: - """Get provider. - - Get detailed information about a specific provider. - - :param provider_id: The ID of the provider to inspect. - :returns: A ProviderInfo object containing the provider's details. - """ - ... diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py deleted file mode 100644 index d93bc1355..000000000 --- a/llama_stack/apis/safety/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .safety import * diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py deleted file mode 100644 index eaaa937d3..000000000 --- a/llama_stack/apis/safety/safety.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum -from typing import Any, Protocol, runtime_checkable - -from pydantic import BaseModel, Field - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.shields import Shield -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - - -@json_schema_type -class ModerationObjectResults(BaseModel): - """A moderation object. - :param flagged: Whether any of the below categories are flagged. - :param categories: A list of the categories, and whether they are flagged or not. - :param category_applied_input_types: A list of the categories along with the input type(s) that the score applies to. - :param category_scores: A list of the categories along with their scores as predicted by model. - """ - - flagged: bool - categories: dict[str, bool] | None = None - category_applied_input_types: dict[str, list[str]] | None = None - category_scores: dict[str, float] | None = None - user_message: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class ModerationObject(BaseModel): - """A moderation object. - :param id: The unique identifier for the moderation request. - :param model: The model used to generate the moderation results. - :param results: A list of moderation objects - """ - - id: str - model: str - results: list[ModerationObjectResults] - - -@json_schema_type -class ViolationLevel(Enum): - """Severity level of a safety violation. - - :cvar INFO: Informational level violation that does not require action - :cvar WARN: Warning level violation that suggests caution but allows continuation - :cvar ERROR: Error level violation that requires blocking or intervention - """ - - INFO = "info" - WARN = "warn" - ERROR = "error" - - -@json_schema_type -class SafetyViolation(BaseModel): - """Details of a safety violation detected by content moderation. - - :param violation_level: Severity level of the violation - :param user_message: (Optional) Message to convey to the user about the violation - :param metadata: Additional metadata including specific violation codes for debugging and telemetry - """ - - violation_level: ViolationLevel - - # what message should you convey to the user - user_message: str | None = None - - # additional metadata (including specific violation codes) more for - # debugging, telemetry - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RunShieldResponse(BaseModel): - """Response from running a safety shield. - - :param violation: (Optional) Safety violation detected by the shield, if any - """ - - violation: SafetyViolation | None = None - - -class ShieldStore(Protocol): - async def get_shield(self, identifier: str) -> Shield: ... - - -@runtime_checkable -@trace_protocol -class Safety(Protocol): - """Safety - - OpenAI-compatible Moderations API. - """ - - shield_store: ShieldStore - - @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1) - async def run_shield( - self, - shield_id: str, - messages: list[OpenAIMessageParam], - params: dict[str, Any], - ) -> RunShieldResponse: - """Run shield. - - Run a shield. - - :param shield_id: The identifier of the shield to run. - :param messages: The messages to run the shield on. - :param params: The parameters of the shield. - :returns: A RunShieldResponse. - """ - ... - - @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1) - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: - """Create moderation. - - Classifies if text and/or image inputs are potentially harmful. - :param input: Input (or inputs) to classify. - Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models. - :param model: The content moderation model you would like to use. - :returns: A moderation object. - """ - ... diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py deleted file mode 100644 index 624b9e704..000000000 --- a/llama_stack/apis/scoring/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .scoring import * diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py deleted file mode 100644 index 03d943e94..000000000 --- a/llama_stack/apis/scoring/scoring.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod - -# mapping of metric to value -ScoringResultRow = dict[str, Any] - - -@json_schema_type -class ScoringResult(BaseModel): - """ - A scoring result for a single row. - - :param score_rows: The scoring result for each row. Each row is a map of column name to value. - :param aggregated_results: Map of metric name to aggregated value - """ - - score_rows: list[ScoringResultRow] - # aggregated metrics to value - aggregated_results: dict[str, Any] - - -@json_schema_type -class ScoreBatchResponse(BaseModel): - """Response from batch scoring operations on datasets. - - :param dataset_id: (Optional) The identifier of the dataset that was scored - :param results: A map of scoring function name to ScoringResult - """ - - dataset_id: str | None = None - results: dict[str, ScoringResult] - - -@json_schema_type -class ScoreResponse(BaseModel): - """ - The response from scoring. - - :param results: A map of scoring function name to ScoringResult. - """ - - # each key in the dict is a scoring function name - results: dict[str, ScoringResult] - - -class ScoringFunctionStore(Protocol): - def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ... - - -@runtime_checkable -class Scoring(Protocol): - scoring_function_store: ScoringFunctionStore - - @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1) - async def score_batch( - self, - dataset_id: str, - scoring_functions: dict[str, ScoringFnParams | None], - save_results_dataset: bool = False, - ) -> ScoreBatchResponse: - """Score a batch of rows. - - :param dataset_id: The ID of the dataset to score. - :param scoring_functions: The scoring functions to use for the scoring. - :param save_results_dataset: Whether to save the results to a dataset. - :returns: A ScoreBatchResponse. - """ - ... - - @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1) - async def score( - self, - input_rows: list[dict[str, Any]], - scoring_functions: dict[str, ScoringFnParams | None], - ) -> ScoreResponse: - """Score a list of rows. - - :param input_rows: The rows to score. - :param scoring_functions: The scoring functions to use for the scoring. - :returns: A ScoreResponse object containing rows and aggregated results. - """ - ... diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py deleted file mode 100644 index fc1de0311..000000000 --- a/llama_stack/apis/scoring_functions/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .scoring_functions import * diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py deleted file mode 100644 index fe49723ab..000000000 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# TODO: use enum.StrEnum when we drop support for python 3.10 -from enum import StrEnum -from typing import ( - Annotated, - Any, - Literal, - Protocol, - runtime_checkable, -) - -from pydantic import BaseModel, Field - -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -# Perhaps more structure can be imposed on these functions. Maybe they could be associated -# with standard metrics so they can be rolled up? -@json_schema_type -class ScoringFnParamsType(StrEnum): - """Types of scoring function parameter configurations. - :cvar llm_as_judge: Use an LLM model to evaluate and score responses - :cvar regex_parser: Use regex patterns to extract and score specific parts of responses - :cvar basic: Basic scoring with simple aggregation functions - """ - - llm_as_judge = "llm_as_judge" - regex_parser = "regex_parser" - basic = "basic" - - -@json_schema_type -class AggregationFunctionType(StrEnum): - """Types of aggregation functions for scoring results. - :cvar average: Calculate the arithmetic mean of scores - :cvar weighted_average: Calculate a weighted average of scores - :cvar median: Calculate the median value of scores - :cvar categorical_count: Count occurrences of categorical values - :cvar accuracy: Calculate accuracy as the proportion of correct answers - """ - - average = "average" - weighted_average = "weighted_average" - median = "median" - categorical_count = "categorical_count" - accuracy = "accuracy" - - -@json_schema_type -class LLMAsJudgeScoringFnParams(BaseModel): - """Parameters for LLM-as-judge scoring function configuration. - :param type: The type of scoring function parameters, always llm_as_judge - :param judge_model: Identifier of the LLM model to use as a judge for scoring - :param prompt_template: (Optional) Custom prompt template for the judge model - :param judge_score_regexes: Regexes to extract the answer from generated response - :param aggregation_functions: Aggregation functions to apply to the scores of each row - """ - - type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge - judge_model: str - prompt_template: str | None = None - judge_score_regexes: list[str] = Field( - description="Regexes to extract the answer from generated response", - default_factory=lambda: [], - ) - aggregation_functions: list[AggregationFunctionType] = Field( - description="Aggregation functions to apply to the scores of each row", - default_factory=lambda: [], - ) - - -@json_schema_type -class RegexParserScoringFnParams(BaseModel): - """Parameters for regex parser scoring function configuration. - :param type: The type of scoring function parameters, always regex_parser - :param parsing_regexes: Regex to extract the answer from generated response - :param aggregation_functions: Aggregation functions to apply to the scores of each row - """ - - type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser - parsing_regexes: list[str] = Field( - description="Regex to extract the answer from generated response", - default_factory=lambda: [], - ) - aggregation_functions: list[AggregationFunctionType] = Field( - description="Aggregation functions to apply to the scores of each row", - default_factory=lambda: [], - ) - - -@json_schema_type -class BasicScoringFnParams(BaseModel): - """Parameters for basic scoring function configuration. - :param type: The type of scoring function parameters, always basic - :param aggregation_functions: Aggregation functions to apply to the scores of each row - """ - - type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic - aggregation_functions: list[AggregationFunctionType] = Field( - description="Aggregation functions to apply to the scores of each row", - default_factory=list, - ) - - -ScoringFnParams = Annotated[ - LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams, - Field(discriminator="type"), -] -register_schema(ScoringFnParams, name="ScoringFnParams") - - -class CommonScoringFnFields(BaseModel): - description: str | None = None - metadata: dict[str, Any] = Field( - default_factory=dict, - description="Any additional metadata for this definition", - ) - return_type: ParamType = Field( - description="The return type of the deterministic function", - ) - params: ScoringFnParams | None = Field( - description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval", - default=None, - ) - - -@json_schema_type -class ScoringFn(CommonScoringFnFields, Resource): - """A scoring function resource for evaluating model outputs. - :param type: The resource type, always scoring_function - """ - - type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function - - @property - def scoring_fn_id(self) -> str: - return self.identifier - - @property - def provider_scoring_fn_id(self) -> str | None: - return self.provider_resource_id - - -class ScoringFnInput(CommonScoringFnFields, BaseModel): - scoring_fn_id: str - provider_id: str | None = None - provider_scoring_fn_id: str | None = None - - -class ListScoringFunctionsResponse(BaseModel): - data: list[ScoringFn] - - -@runtime_checkable -class ScoringFunctions(Protocol): - @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1) - async def list_scoring_functions(self) -> ListScoringFunctionsResponse: - """List all scoring functions. - - :returns: A ListScoringFunctionsResponse. - """ - ... - - @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: - """Get a scoring function by its ID. - - :param scoring_fn_id: The ID of the scoring function to get. - :returns: A ScoringFn. - """ - ... - - @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1) - async def register_scoring_function( - self, - scoring_fn_id: str, - description: str, - return_type: ParamType, - provider_scoring_fn_id: str | None = None, - provider_id: str | None = None, - params: ScoringFnParams | None = None, - ) -> None: - """Register a scoring function. - - :param scoring_fn_id: The ID of the scoring function to register. - :param description: The description of the scoring function. - :param return_type: The return type of the scoring function. - :param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function. - :param provider_id: The ID of the provider to use for the scoring function. - :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval. - """ - ... - - @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) - async def unregister_scoring_function(self, scoring_fn_id: str) -> None: - """Unregister a scoring function. - - :param scoring_fn_id: The ID of the scoring function to unregister. - """ - ... diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py deleted file mode 100644 index 783a4d124..000000000 --- a/llama_stack/apis/shields/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .shields import * diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py deleted file mode 100644 index 5d967cf02..000000000 --- a/llama_stack/apis/shields/shields.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Literal, Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - - -class CommonShieldFields(BaseModel): - params: dict[str, Any] | None = None - - -@json_schema_type -class Shield(CommonShieldFields, Resource): - """A safety shield resource that can be used to check content. - - :param params: (Optional) Configuration parameters for the shield - :param type: The resource type, always shield - """ - - type: Literal[ResourceType.shield] = ResourceType.shield - - @property - def shield_id(self) -> str: - return self.identifier - - @property - def provider_shield_id(self) -> str | None: - return self.provider_resource_id - - -class ShieldInput(CommonShieldFields): - shield_id: str - provider_id: str | None = None - provider_shield_id: str | None = None - - -class ListShieldsResponse(BaseModel): - data: list[Shield] - - -@runtime_checkable -@trace_protocol -class Shields(Protocol): - @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) - async def list_shields(self) -> ListShieldsResponse: - """List all shields. - - :returns: A ListShieldsResponse. - """ - ... - - @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_shield(self, identifier: str) -> Shield: - """Get a shield by its identifier. - - :param identifier: The identifier of the shield to get. - :returns: A Shield. - """ - ... - - @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1) - async def register_shield( - self, - shield_id: str, - provider_shield_id: str | None = None, - provider_id: str | None = None, - params: dict[str, Any] | None = None, - ) -> Shield: - """Register a shield. - - :param shield_id: The identifier of the shield to register. - :param provider_shield_id: The identifier of the shield in the provider. - :param provider_id: The identifier of the provider. - :param params: The parameters of the shield. - :returns: A Shield. - """ - ... - - @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1) - async def unregister_shield(self, identifier: str) -> None: - """Unregister a shield. - - :param identifier: The identifier of the shield to unregister. - """ - ... diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py deleted file mode 100644 index bc169e8e6..000000000 --- a/llama_stack/apis/synthetic_data_generation/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .synthetic_data_generation import * diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py deleted file mode 100644 index c13e2c17c..000000000 --- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum -from typing import Any, Protocol - -from pydantic import BaseModel - -from llama_stack.apis.inference import Message -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod - - -class FilteringFunction(Enum): - """The type of filtering function. - - :cvar none: No filtering applied, accept all generated synthetic data - :cvar random: Random sampling of generated data points - :cvar top_k: Keep only the top-k highest scoring synthetic data samples - :cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold - :cvar top_k_top_p: Combined top-k and top-p filtering strategy - :cvar sigmoid: Apply sigmoid function for probability-based filtering - """ - - none = "none" - random = "random" - top_k = "top_k" - top_p = "top_p" - top_k_top_p = "top_k_top_p" - sigmoid = "sigmoid" - - -@json_schema_type -class SyntheticDataGenerationRequest(BaseModel): - """Request to generate synthetic data. A small batch of prompts and a filtering function - - :param dialogs: List of conversation messages to use as input for synthetic data generation - :param filtering_function: Type of filtering to apply to generated synthetic data samples - :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint - """ - - dialogs: list[Message] - filtering_function: FilteringFunction = FilteringFunction.none - model: str | None = None - - -@json_schema_type -class SyntheticDataGenerationResponse(BaseModel): - """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold. - - :param synthetic_data: List of generated synthetic data samples that passed the filtering criteria - :param statistics: (Optional) Statistical information about the generation process and filtering results - """ - - synthetic_data: list[dict[str, Any]] - statistics: dict[str, Any] | None = None - - -class SyntheticDataGeneration(Protocol): - @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1) - def synthetic_data_generate( - self, - dialogs: list[Message], - filtering_function: FilteringFunction = FilteringFunction.none, - model: str | None = None, - ) -> SyntheticDataGenerationResponse: - """Generate synthetic data based on input dialogs and apply filtering. - - :param dialogs: List of conversation messages to use as input for synthetic data generation - :param filtering_function: Type of filtering to apply to generated synthetic data samples - :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint - :returns: Response containing filtered synthetic data samples and optional statistics - """ - ... diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py deleted file mode 100644 index 1250767f7..000000000 --- a/llama_stack/apis/telemetry/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .telemetry import * diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py deleted file mode 100644 index 53387639b..000000000 --- a/llama_stack/apis/telemetry/telemetry.py +++ /dev/null @@ -1,423 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from datetime import datetime -from enum import Enum -from typing import ( - Annotated, - Any, - Literal, - Protocol, - runtime_checkable, -) - -from pydantic import BaseModel, Field - -from llama_stack.models.llama.datatypes import Primitive -from llama_stack.schema_utils import json_schema_type, register_schema - -# Add this constant near the top of the file, after the imports -DEFAULT_TTL_DAYS = 7 - - -@json_schema_type -class SpanStatus(Enum): - """The status of a span indicating whether it completed successfully or with an error. - :cvar OK: Span completed successfully without errors - :cvar ERROR: Span completed with an error or failure - """ - - OK = "ok" - ERROR = "error" - - -@json_schema_type -class Span(BaseModel): - """A span representing a single operation within a trace. - :param span_id: Unique identifier for the span - :param trace_id: Unique identifier for the trace this span belongs to - :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span - :param name: Human-readable name describing the operation this span represents - :param start_time: Timestamp when the operation began - :param end_time: (Optional) Timestamp when the operation finished, if completed - :param attributes: (Optional) Key-value pairs containing additional metadata about the span - """ - - span_id: str - trace_id: str - parent_span_id: str | None = None - name: str - start_time: datetime - end_time: datetime | None = None - attributes: dict[str, Any] | None = Field(default_factory=lambda: {}) - - def set_attribute(self, key: str, value: Any): - if self.attributes is None: - self.attributes = {} - self.attributes[key] = value - - -@json_schema_type -class Trace(BaseModel): - """A trace representing the complete execution path of a request across multiple operations. - :param trace_id: Unique identifier for the trace - :param root_span_id: Unique identifier for the root span that started this trace - :param start_time: Timestamp when the trace began - :param end_time: (Optional) Timestamp when the trace finished, if completed - """ - - trace_id: str - root_span_id: str - start_time: datetime - end_time: datetime | None = None - - -@json_schema_type -class EventType(Enum): - """The type of telemetry event being logged. - :cvar UNSTRUCTURED_LOG: A simple log message with severity level - :cvar STRUCTURED_LOG: A structured log event with typed payload data - :cvar METRIC: A metric measurement with value and unit - """ - - UNSTRUCTURED_LOG = "unstructured_log" - STRUCTURED_LOG = "structured_log" - METRIC = "metric" - - -@json_schema_type -class LogSeverity(Enum): - """The severity level of a log message. - :cvar VERBOSE: Detailed diagnostic information for troubleshooting - :cvar DEBUG: Debug information useful during development - :cvar INFO: General informational messages about normal operation - :cvar WARN: Warning messages about potentially problematic situations - :cvar ERROR: Error messages indicating failures that don't stop execution - :cvar CRITICAL: Critical error messages indicating severe failures - """ - - VERBOSE = "verbose" - DEBUG = "debug" - INFO = "info" - WARN = "warn" - ERROR = "error" - CRITICAL = "critical" - - -class EventCommon(BaseModel): - """Common fields shared by all telemetry events. - :param trace_id: Unique identifier for the trace this event belongs to - :param span_id: Unique identifier for the span this event belongs to - :param timestamp: Timestamp when the event occurred - :param attributes: (Optional) Key-value pairs containing additional metadata about the event - """ - - trace_id: str - span_id: str - timestamp: datetime - attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {}) - - -@json_schema_type -class UnstructuredLogEvent(EventCommon): - """An unstructured log event containing a simple text message. - :param type: Event type identifier set to UNSTRUCTURED_LOG - :param message: The log message text - :param severity: The severity level of the log message - """ - - type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG - message: str - severity: LogSeverity - - -@json_schema_type -class MetricEvent(EventCommon): - """A metric event containing a measured value. - :param type: Event type identifier set to METRIC - :param metric: The name of the metric being measured - :param value: The numeric value of the metric measurement - :param unit: The unit of measurement for the metric value - """ - - type: Literal[EventType.METRIC] = EventType.METRIC - metric: str # this would be an enum - value: int | float - unit: str - - -@json_schema_type -class MetricInResponse(BaseModel): - """A metric value included in API responses. - :param metric: The name of the metric - :param value: The numeric value of the metric - :param unit: (Optional) The unit of measurement for the metric value - """ - - metric: str - value: int | float - unit: str | None = None - - -# This is a short term solution to allow inference API to return metrics -# The ideal way to do this is to have a way for all response types to include metrics -# and all metric events logged to the telemetry API to be included with the response -# To do this, we will need to augment all response types with a metrics field. -# We have hit a blocker from stainless SDK that prevents us from doing this. -# The blocker is that if we were to augment the response types that have a data field -# in them like so -# class ListModelsResponse(BaseModel): -# metrics: Optional[List[MetricEvent]] = None -# data: List[Models] -# ... -# The client SDK will need to access the data by using a .data field, which is not -# ergonomic. Stainless SDK does support unwrapping the response type, but it -# requires that the response type to only have a single field. - -# We will need a way in the client SDK to signal that the metrics are needed -# and if they are needed, the client SDK has to return the full response type -# without unwrapping it. - - -class MetricResponseMixin(BaseModel): - """Mixin class for API responses that can include metrics. - :param metrics: (Optional) List of metrics associated with the API response - """ - - metrics: list[MetricInResponse] | None = None - - -@json_schema_type -class StructuredLogType(Enum): - """The type of structured log event payload. - :cvar SPAN_START: Event indicating the start of a new span - :cvar SPAN_END: Event indicating the completion of a span - """ - - SPAN_START = "span_start" - SPAN_END = "span_end" - - -@json_schema_type -class SpanStartPayload(BaseModel): - """Payload for a span start event. - :param type: Payload type identifier set to SPAN_START - :param name: Human-readable name describing the operation this span represents - :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span - """ - - type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START - name: str - parent_span_id: str | None = None - - -@json_schema_type -class SpanEndPayload(BaseModel): - """Payload for a span end event. - :param type: Payload type identifier set to SPAN_END - :param status: The final status of the span indicating success or failure - """ - - type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END - status: SpanStatus - - -StructuredLogPayload = Annotated[ - SpanStartPayload | SpanEndPayload, - Field(discriminator="type"), -] -register_schema(StructuredLogPayload, name="StructuredLogPayload") - - -@json_schema_type -class StructuredLogEvent(EventCommon): - """A structured log event containing typed payload data. - :param type: Event type identifier set to STRUCTURED_LOG - :param payload: The structured payload data for the log event - """ - - type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG - payload: StructuredLogPayload - - -Event = Annotated[ - UnstructuredLogEvent | MetricEvent | StructuredLogEvent, - Field(discriminator="type"), -] -register_schema(Event, name="Event") - - -@json_schema_type -class EvalTrace(BaseModel): - """A trace record for evaluation purposes. - :param session_id: Unique identifier for the evaluation session - :param step: The evaluation step or phase identifier - :param input: The input data for the evaluation - :param output: The actual output produced during evaluation - :param expected_output: The expected output for comparison during evaluation - """ - - session_id: str - step: str - input: str - output: str - expected_output: str - - -@json_schema_type -class SpanWithStatus(Span): - """A span that includes status information. - :param status: (Optional) The current status of the span - """ - - status: SpanStatus | None = None - - -@json_schema_type -class QueryConditionOp(Enum): - """Comparison operators for query conditions. - :cvar EQ: Equal to comparison - :cvar NE: Not equal to comparison - :cvar GT: Greater than comparison - :cvar LT: Less than comparison - """ - - EQ = "eq" - NE = "ne" - GT = "gt" - LT = "lt" - - -@json_schema_type -class QueryCondition(BaseModel): - """A condition for filtering query results. - :param key: The attribute key to filter on - :param op: The comparison operator to apply - :param value: The value to compare against - """ - - key: str - op: QueryConditionOp - value: Any - - -class QueryTracesResponse(BaseModel): - """Response containing a list of traces. - :param data: List of traces matching the query criteria - """ - - data: list[Trace] - - -class QuerySpansResponse(BaseModel): - """Response containing a list of spans. - :param data: List of spans matching the query criteria - """ - - data: list[Span] - - -class QuerySpanTreeResponse(BaseModel): - """Response containing a tree structure of spans. - :param data: Dictionary mapping span IDs to spans with status information - """ - - data: dict[str, SpanWithStatus] - - -class MetricQueryType(Enum): - """The type of metric query to perform. - :cvar RANGE: Query metrics over a time range - :cvar INSTANT: Query metrics at a specific point in time - """ - - RANGE = "range" - INSTANT = "instant" - - -class MetricLabelOperator(Enum): - """Operators for matching metric labels. - :cvar EQUALS: Label value must equal the specified value - :cvar NOT_EQUALS: Label value must not equal the specified value - :cvar REGEX_MATCH: Label value must match the specified regular expression - :cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression - """ - - EQUALS = "=" - NOT_EQUALS = "!=" - REGEX_MATCH = "=~" - REGEX_NOT_MATCH = "!~" - - -class MetricLabelMatcher(BaseModel): - """A matcher for filtering metrics by label values. - :param name: The name of the label to match - :param value: The value to match against - :param operator: The comparison operator to use for matching - """ - - name: str - value: str - operator: MetricLabelOperator = MetricLabelOperator.EQUALS - - -@json_schema_type -class MetricLabel(BaseModel): - """A label associated with a metric. - :param name: The name of the label - :param value: The value of the label - """ - - name: str - value: str - - -@json_schema_type -class MetricDataPoint(BaseModel): - """A single data point in a metric time series. - :param timestamp: Unix timestamp when the metric value was recorded - :param value: The numeric value of the metric at this timestamp - """ - - timestamp: int - value: float - unit: str - - -@json_schema_type -class MetricSeries(BaseModel): - """A time series of metric data points. - :param metric: The name of the metric - :param labels: List of labels associated with this metric series - :param values: List of data points in chronological order - """ - - metric: str - labels: list[MetricLabel] - values: list[MetricDataPoint] - - -class QueryMetricsResponse(BaseModel): - """Response containing metric time series data. - :param data: List of metric series matching the query criteria - """ - - data: list[MetricSeries] - - -@runtime_checkable -class Telemetry(Protocol): - async def log_event( - self, - event: Event, - ttl_seconds: int = DEFAULT_TTL_DAYS * 86400, - ) -> None: - """Log an event. - - :param event: The event to log. - :param ttl_seconds: The time to live of the event. - """ - ... diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py deleted file mode 100644 index b25310ecf..000000000 --- a/llama_stack/apis/tools/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .rag_tool import * -from .tools import * diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py deleted file mode 100644 index ed7847e23..000000000 --- a/llama_stack/apis/tools/rag_tool.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, StrEnum -from typing import Annotated, Any, Literal, Protocol - -from pydantic import BaseModel, Field, field_validator -from typing_extensions import runtime_checkable - -from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod - - -@json_schema_type -class RRFRanker(BaseModel): - """ - Reciprocal Rank Fusion (RRF) ranker configuration. - - :param type: The type of ranker, always "rrf" - :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. - Must be greater than 0 - """ - - type: Literal["rrf"] = "rrf" - impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance - - -@json_schema_type -class WeightedRanker(BaseModel): - """ - Weighted ranker configuration that combines vector and keyword scores. - - :param type: The type of ranker, always "weighted" - :param alpha: Weight factor between 0 and 1. - 0 means only use keyword scores, - 1 means only use vector scores, - values in between blend both scores. - """ - - type: Literal["weighted"] = "weighted" - alpha: float = Field( - default=0.5, - ge=0.0, - le=1.0, - description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.", - ) - - -Ranker = Annotated[ - RRFRanker | WeightedRanker, - Field(discriminator="type"), -] -register_schema(Ranker, name="Ranker") - - -@json_schema_type -class RAGDocument(BaseModel): - """ - A document to be used for document ingestion in the RAG Tool. - - :param document_id: The unique identifier for the document. - :param content: The content of the document. - :param mime_type: The MIME type of the document. - :param metadata: Additional metadata for the document. - """ - - document_id: str - content: InterleavedContent | URL - mime_type: str | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryResult(BaseModel): - """Result of a RAG query containing retrieved content and metadata. - - :param content: (Optional) The retrieved content from the query - :param metadata: Additional metadata about the query result - """ - - content: InterleavedContent | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class RAGQueryGenerator(Enum): - """Types of query generators for RAG systems. - - :cvar default: Default query generator using simple text processing - :cvar llm: LLM-based query generator for enhanced query understanding - :cvar custom: Custom query generator implementation - """ - - default = "default" - llm = "llm" - custom = "custom" - - -@json_schema_type -class RAGSearchMode(StrEnum): - """ - Search modes for RAG query retrieval: - - VECTOR: Uses vector similarity search for semantic matching - - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - """ - - VECTOR = "vector" - KEYWORD = "keyword" - HYBRID = "hybrid" - - -@json_schema_type -class DefaultRAGQueryGeneratorConfig(BaseModel): - """Configuration for the default RAG query generator. - - :param type: Type of query generator, always 'default' - :param separator: String separator used to join query terms - """ - - type: Literal["default"] = "default" - separator: str = " " - - -@json_schema_type -class LLMRAGQueryGeneratorConfig(BaseModel): - """Configuration for the LLM-based RAG query generator. - - :param type: Type of query generator, always 'llm' - :param model: Name of the language model to use for query generation - :param template: Template string for formatting the query generation prompt - """ - - type: Literal["llm"] = "llm" - model: str - template: str - - -RAGQueryGeneratorConfig = Annotated[ - DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig, - Field(discriminator="type"), -] -register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig") - - -@json_schema_type -class RAGQueryConfig(BaseModel): - """ - Configuration for the RAG query generation. - - :param query_generator_config: Configuration for the query generator. - :param max_tokens_in_context: Maximum number of tokens in the context. - :param max_chunks: Maximum number of chunks to retrieve. - :param chunk_template: Template for formatting each retrieved chunk in the context. - Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). - Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" - :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector". - :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker. - """ - - # This config defines how a query is generated using the messages - # for memory bank retrieval. - query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig()) - max_tokens_in_context: int = 4096 - max_chunks: int = 5 - chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" - mode: RAGSearchMode | None = RAGSearchMode.VECTOR - ranker: Ranker | None = Field(default=None) # Only used for hybrid mode - - @field_validator("chunk_template") - def validate_chunk_template(cls, v: str) -> str: - if "{chunk.content}" not in v: - raise ValueError("chunk_template must contain {chunk.content}") - if "{index}" not in v: - raise ValueError("chunk_template must contain {index}") - if len(v) == 0: - raise ValueError("chunk_template must not be empty") - return v - - -@runtime_checkable -@trace_protocol -class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) - async def insert( - self, - documents: list[RAGDocument], - vector_db_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - """Index documents so they can be used by the RAG system. - - :param documents: List of documents to index in the RAG system - :param vector_db_id: ID of the vector database to store the document embeddings - :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing - """ - ... - - @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1) - async def query( - self, - content: InterleavedContent, - vector_db_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - """Query the RAG system for context; typically invoked by the agent. - - :param content: The query content to search for in the indexed documents - :param vector_db_ids: List of vector database IDs to search within - :param query_config: (Optional) Configuration parameters for the query operation - :returns: RAGQueryResult containing the retrieved content and metadata - """ - ... diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py deleted file mode 100644 index b6a1a2543..000000000 --- a/llama_stack/apis/tools/tools.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum -from typing import Any, Literal, Protocol - -from pydantic import BaseModel -from typing_extensions import runtime_checkable - -from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.schema_utils import json_schema_type, webmethod - -from .rag_tool import RAGToolRuntime - - -@json_schema_type -class ToolDef(BaseModel): - """Tool definition used in runtime contexts. - - :param name: Name of the tool - :param description: (Optional) Human-readable description of what the tool does - :param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema) - :param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema) - :param metadata: (Optional) Additional metadata about the tool - :param toolgroup_id: (Optional) ID of the tool group this tool belongs to - """ - - toolgroup_id: str | None = None - name: str - description: str | None = None - input_schema: dict[str, Any] | None = None - output_schema: dict[str, Any] | None = None - metadata: dict[str, Any] | None = None - - -@json_schema_type -class ToolGroupInput(BaseModel): - """Input data for registering a tool group. - - :param toolgroup_id: Unique identifier for the tool group - :param provider_id: ID of the provider that will handle this tool group - :param args: (Optional) Additional arguments to pass to the provider - :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools - """ - - toolgroup_id: str - provider_id: str - args: dict[str, Any] | None = None - mcp_endpoint: URL | None = None - - -@json_schema_type -class ToolGroup(Resource): - """A group of related tools managed together. - - :param type: Type of resource, always 'tool_group' - :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools - :param args: (Optional) Additional arguments for the tool group - """ - - type: Literal[ResourceType.tool_group] = ResourceType.tool_group - mcp_endpoint: URL | None = None - args: dict[str, Any] | None = None - - -@json_schema_type -class ToolInvocationResult(BaseModel): - """Result of a tool invocation. - - :param content: (Optional) The output content from the tool execution - :param error_message: (Optional) Error message if the tool execution failed - :param error_code: (Optional) Numeric error code if the tool execution failed - :param metadata: (Optional) Additional metadata about the tool execution - """ - - content: InterleavedContent | None = None - error_message: str | None = None - error_code: int | None = None - metadata: dict[str, Any] | None = None - - -class ToolStore(Protocol): - async def get_tool(self, tool_name: str) -> ToolDef: ... - async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ... - - -class ListToolGroupsResponse(BaseModel): - """Response containing a list of tool groups. - - :param data: List of tool groups - """ - - data: list[ToolGroup] - - -class ListToolDefsResponse(BaseModel): - """Response containing a list of tool definitions. - - :param data: List of tool definitions - """ - - data: list[ToolDef] - - -@runtime_checkable -@trace_protocol -class ToolGroups(Protocol): - @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1) - async def register_tool_group( - self, - toolgroup_id: str, - provider_id: str, - mcp_endpoint: URL | None = None, - args: dict[str, Any] | None = None, - ) -> None: - """Register a tool group. - - :param toolgroup_id: The ID of the tool group to register. - :param provider_id: The ID of the provider to use for the tool group. - :param mcp_endpoint: The MCP endpoint to use for the tool group. - :param args: A dictionary of arguments to pass to the tool group. - """ - ... - - @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_tool_group( - self, - toolgroup_id: str, - ) -> ToolGroup: - """Get a tool group by its ID. - - :param toolgroup_id: The ID of the tool group to get. - :returns: A ToolGroup. - """ - ... - - @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1) - async def list_tool_groups(self) -> ListToolGroupsResponse: - """List tool groups with optional provider. - - :returns: A ListToolGroupsResponse. - """ - ... - - @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1) - async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse: - """List tools with optional tool group. - - :param toolgroup_id: The ID of the tool group to list tools for. - :returns: A ListToolDefsResponse. - """ - ... - - @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1) - async def get_tool( - self, - tool_name: str, - ) -> ToolDef: - """Get a tool by its name. - - :param tool_name: The name of the tool to get. - :returns: A ToolDef. - """ - ... - - @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) - async def unregister_toolgroup( - self, - toolgroup_id: str, - ) -> None: - """Unregister a tool group. - - :param toolgroup_id: The ID of the tool group to unregister. - """ - ... - - -class SpecialToolGroup(Enum): - """Special tool groups with predefined functionality. - - :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval - """ - - rag_tool = "rag_tool" - - -@runtime_checkable -@trace_protocol -class ToolRuntime(Protocol): - tool_store: ToolStore | None = None - - rag_tool: RAGToolRuntime | None = None - - # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. - @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1) - async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None - ) -> ListToolDefsResponse: - """List all tools in the runtime. - - :param tool_group_id: The ID of the tool group to list tools for. - :param mcp_endpoint: The MCP endpoint to use for the tool group. - :returns: A ListToolDefsResponse. - """ - ... - - @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: - """Run a tool with the given arguments. - - :param tool_name: The name of the tool to invoke. - :param kwargs: A dictionary of arguments to pass to the tool. - :returns: A ToolInvocationResult. - """ - ... diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py deleted file mode 100644 index 3f4c60805..000000000 --- a/llama_stack/apis/vector_io/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .vector_io import * diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py deleted file mode 100644 index 49e4df039..000000000 --- a/llama_stack/apis/vector_io/vector_io.py +++ /dev/null @@ -1,960 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import uuid -from typing import Annotated, Any, Literal, Protocol, runtime_checkable - -from fastapi import Body -from pydantic import BaseModel, Field - -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol -from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id -from llama_stack.schema_utils import json_schema_type, webmethod -from llama_stack.strong_typing.schema import register_schema - - -@json_schema_type -class ChunkMetadata(BaseModel): - """ - `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that - will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` - is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. - Use `Chunk.metadata` for metadata that will be used in the context during inference. - :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content. - :param document_id: The ID of the document this chunk belongs to. - :param source: The source of the content, such as a URL, file path, or other identifier. - :param created_timestamp: An optional timestamp indicating when the chunk was created. - :param updated_timestamp: An optional timestamp indicating when the chunk was last updated. - :param chunk_window: The window of the chunk, which can be used to group related chunks together. - :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken. - :param chunk_embedding_model: The embedding model used to create the chunk's embedding. - :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk. - :param content_token_count: The number of tokens in the content of the chunk. - :param metadata_token_count: The number of tokens in the metadata of the chunk. - """ - - chunk_id: str | None = None - document_id: str | None = None - source: str | None = None - created_timestamp: int | None = None - updated_timestamp: int | None = None - chunk_window: str | None = None - chunk_tokenizer: str | None = None - chunk_embedding_model: str | None = None - chunk_embedding_dimension: int | None = None - content_token_count: int | None = None - metadata_token_count: int | None = None - - -@json_schema_type -class Chunk(BaseModel): - """ - A chunk of content that can be inserted into a vector database. - :param content: The content of the chunk, which can be interleaved text, images, or other types. - :param embedding: Optional embedding for the chunk. If not provided, it will be computed later. - :param metadata: Metadata associated with the chunk that will be used in the model context during inference. - :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality. - :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. - The `chunk_metadata` is required backend functionality. - """ - - content: InterleavedContent - metadata: dict[str, Any] = Field(default_factory=dict) - embedding: list[float] | None = None - # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id" - stored_chunk_id: str | None = Field(default=None, alias="chunk_id") - chunk_metadata: ChunkMetadata | None = None - - model_config = {"populate_by_name": True} - - def model_post_init(self, __context): - # Extract chunk_id from metadata if present - if self.metadata and "chunk_id" in self.metadata: - self.stored_chunk_id = self.metadata.pop("chunk_id") - - @property - def chunk_id(self) -> str: - """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set.""" - if self.stored_chunk_id: - return self.stored_chunk_id - - if "document_id" in self.metadata: - return generate_chunk_id(self.metadata["document_id"], str(self.content)) - - return generate_chunk_id(str(uuid.uuid4()), str(self.content)) - - @property - def document_id(self) -> str | None: - """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" - # Check metadata first (takes precedence) - doc_id = self.metadata.get("document_id") - if doc_id is not None: - if not isinstance(doc_id, str): - raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}") - return doc_id - - # Fall back to chunk_metadata if available (Pydantic ensures type safety) - if self.chunk_metadata is not None: - return self.chunk_metadata.document_id - - return None - - -@json_schema_type -class QueryChunksResponse(BaseModel): - """Response from querying chunks in a vector database. - - :param chunks: List of content chunks returned from the query - :param scores: Relevance scores corresponding to each returned chunk - """ - - chunks: list[Chunk] - scores: list[float] - - -@json_schema_type -class VectorStoreFileCounts(BaseModel): - """File processing status counts for a vector store. - - :param completed: Number of files that have been successfully processed - :param cancelled: Number of files that had their processing cancelled - :param failed: Number of files that failed to process - :param in_progress: Number of files currently being processed - :param total: Total number of files in the vector store - """ - - completed: int - cancelled: int - failed: int - in_progress: int - total: int - - -# TODO: rename this as OpenAIVectorStore -@json_schema_type -class VectorStoreObject(BaseModel): - """OpenAI Vector Store object. - - :param id: Unique identifier for the vector store - :param object: Object type identifier, always "vector_store" - :param created_at: Timestamp when the vector store was created - :param name: (Optional) Name of the vector store - :param usage_bytes: Storage space used by the vector store in bytes - :param file_counts: File processing status counts for the vector store - :param status: Current status of the vector store - :param expires_after: (Optional) Expiration policy for the vector store - :param expires_at: (Optional) Timestamp when the vector store will expire - :param last_active_at: (Optional) Timestamp of last activity on the vector store - :param metadata: Set of key-value pairs that can be attached to the vector store - """ - - id: str - object: str = "vector_store" - created_at: int - name: str | None = None - usage_bytes: int = 0 - file_counts: VectorStoreFileCounts - status: str = "completed" - expires_after: dict[str, Any] | None = None - expires_at: int | None = None - last_active_at: int | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class VectorStoreCreateRequest(BaseModel): - """Request to create a vector store. - - :param name: (Optional) Name for the vector store - :param file_ids: List of file IDs to include in the vector store - :param expires_after: (Optional) Expiration policy for the vector store - :param chunking_strategy: (Optional) Strategy for splitting files into chunks - :param metadata: Set of key-value pairs that can be attached to the vector store - """ - - name: str | None = None - file_ids: list[str] = Field(default_factory=list) - expires_after: dict[str, Any] | None = None - chunking_strategy: dict[str, Any] | None = None - metadata: dict[str, Any] = Field(default_factory=dict) - - -@json_schema_type -class VectorStoreModifyRequest(BaseModel): - """Request to modify a vector store. - - :param name: (Optional) Updated name for the vector store - :param expires_after: (Optional) Updated expiration policy for the vector store - :param metadata: (Optional) Updated set of key-value pairs for the vector store - """ - - name: str | None = None - expires_after: dict[str, Any] | None = None - metadata: dict[str, Any] | None = None - - -@json_schema_type -class VectorStoreListResponse(BaseModel): - """Response from listing vector stores. - - :param object: Object type identifier, always "list" - :param data: List of vector store objects - :param first_id: (Optional) ID of the first vector store in the list for pagination - :param last_id: (Optional) ID of the last vector store in the list for pagination - :param has_more: Whether there are more vector stores available beyond this page - """ - - object: str = "list" - data: list[VectorStoreObject] - first_id: str | None = None - last_id: str | None = None - has_more: bool = False - - -@json_schema_type -class VectorStoreSearchRequest(BaseModel): - """Request to search a vector store. - - :param query: Search query as a string or list of strings - :param filters: (Optional) Filters based on file attributes to narrow search results - :param max_num_results: Maximum number of results to return, defaults to 10 - :param ranking_options: (Optional) Options for ranking and filtering search results - :param rewrite_query: Whether to rewrite the query for better vector search performance - """ - - query: str | list[str] - filters: dict[str, Any] | None = None - max_num_results: int = 10 - ranking_options: dict[str, Any] | None = None - rewrite_query: bool = False - - -@json_schema_type -class VectorStoreContent(BaseModel): - """Content item from a vector store file or search result. - - :param type: Content type, currently only "text" is supported - :param text: The actual text content - """ - - type: Literal["text"] - text: str - - -@json_schema_type -class VectorStoreSearchResponse(BaseModel): - """Response from searching a vector store. - - :param file_id: Unique identifier of the file containing the result - :param filename: Name of the file containing the result - :param score: Relevance score for this search result - :param attributes: (Optional) Key-value attributes associated with the file - :param content: List of content items matching the search query - """ - - file_id: str - filename: str - score: float - attributes: dict[str, str | float | bool] | None = None - content: list[VectorStoreContent] - - -@json_schema_type -class VectorStoreSearchResponsePage(BaseModel): - """Paginated response from searching a vector store. - - :param object: Object type identifier for the search results page - :param search_query: The original search query that was executed - :param data: List of search result objects - :param has_more: Whether there are more results available beyond this page - :param next_page: (Optional) Token for retrieving the next page of results - """ - - object: str = "vector_store.search_results.page" - search_query: str - data: list[VectorStoreSearchResponse] - has_more: bool = False - next_page: str | None = None - - -@json_schema_type -class VectorStoreDeleteResponse(BaseModel): - """Response from deleting a vector store. - - :param id: Unique identifier of the deleted vector store - :param object: Object type identifier for the deletion response - :param deleted: Whether the deletion operation was successful - """ - - id: str - object: str = "vector_store.deleted" - deleted: bool = True - - -@json_schema_type -class VectorStoreChunkingStrategyAuto(BaseModel): - """Automatic chunking strategy for vector store files. - - :param type: Strategy type, always "auto" for automatic chunking - """ - - type: Literal["auto"] = "auto" - - -@json_schema_type -class VectorStoreChunkingStrategyStaticConfig(BaseModel): - """Configuration for static chunking strategy. - - :param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks - :param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096 - """ - - chunk_overlap_tokens: int = 400 - max_chunk_size_tokens: int = Field(800, ge=100, le=4096) - - -@json_schema_type -class VectorStoreChunkingStrategyStatic(BaseModel): - """Static chunking strategy with configurable parameters. - - :param type: Strategy type, always "static" for static chunking - :param static: Configuration parameters for the static chunking strategy - """ - - type: Literal["static"] = "static" - static: VectorStoreChunkingStrategyStaticConfig - - -VectorStoreChunkingStrategy = Annotated[ - VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic, - Field(discriminator="type"), -] -register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy") - - -class SearchRankingOptions(BaseModel): - """Options for ranking and filtering search results. - - :param ranker: (Optional) Name of the ranking algorithm to use - :param score_threshold: (Optional) Minimum relevance score threshold for results - """ - - ranker: str | None = None - # NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however - # we don't guarantee that the score is between 0 and 1, so will leave this unconstrained - # and let the provider handle it - score_threshold: float | None = Field(default=0.0) - - -@json_schema_type -class VectorStoreFileLastError(BaseModel): - """Error information for failed vector store file processing. - - :param code: Error code indicating the type of failure - :param message: Human-readable error message describing the failure - """ - - code: Literal["server_error"] | Literal["rate_limit_exceeded"] - message: str - - -VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"] -register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus") - - -@json_schema_type -class VectorStoreFileObject(BaseModel): - """OpenAI Vector Store File object. - - :param id: Unique identifier for the file - :param object: Object type identifier, always "vector_store.file" - :param attributes: Key-value attributes associated with the file - :param chunking_strategy: Strategy used for splitting the file into chunks - :param created_at: Timestamp when the file was added to the vector store - :param last_error: (Optional) Error information if file processing failed - :param status: Current processing status of the file - :param usage_bytes: Storage space used by this file in bytes - :param vector_store_id: ID of the vector store containing this file - """ - - id: str - object: str = "vector_store.file" - attributes: dict[str, Any] = Field(default_factory=dict) - chunking_strategy: VectorStoreChunkingStrategy - created_at: int - last_error: VectorStoreFileLastError | None = None - status: VectorStoreFileStatus - usage_bytes: int = 0 - vector_store_id: str - - -@json_schema_type -class VectorStoreListFilesResponse(BaseModel): - """Response from listing files in a vector store. - - :param object: Object type identifier, always "list" - :param data: List of vector store file objects - :param first_id: (Optional) ID of the first file in the list for pagination - :param last_id: (Optional) ID of the last file in the list for pagination - :param has_more: Whether there are more files available beyond this page - """ - - object: str = "list" - data: list[VectorStoreFileObject] - first_id: str | None = None - last_id: str | None = None - has_more: bool = False - - -@json_schema_type -class VectorStoreFileContentsResponse(BaseModel): - """Response from retrieving the contents of a vector store file. - - :param file_id: Unique identifier for the file - :param filename: Name of the file - :param attributes: Key-value attributes associated with the file - :param content: List of content items from the file - """ - - file_id: str - filename: str - attributes: dict[str, Any] - content: list[VectorStoreContent] - - -@json_schema_type -class VectorStoreFileDeleteResponse(BaseModel): - """Response from deleting a vector store file. - - :param id: Unique identifier of the deleted file - :param object: Object type identifier for the deletion response - :param deleted: Whether the deletion operation was successful - """ - - id: str - object: str = "vector_store.file.deleted" - deleted: bool = True - - -@json_schema_type -class VectorStoreFileBatchObject(BaseModel): - """OpenAI Vector Store File Batch object. - - :param id: Unique identifier for the file batch - :param object: Object type identifier, always "vector_store.file_batch" - :param created_at: Timestamp when the file batch was created - :param vector_store_id: ID of the vector store containing the file batch - :param status: Current processing status of the file batch - :param file_counts: File processing status counts for the batch - """ - - id: str - object: str = "vector_store.file_batch" - created_at: int - vector_store_id: str - status: VectorStoreFileStatus - file_counts: VectorStoreFileCounts - - -@json_schema_type -class VectorStoreFilesListInBatchResponse(BaseModel): - """Response from listing files in a vector store file batch. - - :param object: Object type identifier, always "list" - :param data: List of vector store file objects in the batch - :param first_id: (Optional) ID of the first file in the list for pagination - :param last_id: (Optional) ID of the last file in the list for pagination - :param has_more: Whether there are more files available beyond this page - """ - - object: str = "list" - data: list[VectorStoreFileObject] - first_id: str | None = None - last_id: str | None = None - has_more: bool = False - - -# extra_body can be accessed via .model_extra -@json_schema_type -class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): - """Request to create a vector store with extra_body support. - - :param name: (Optional) A name for the vector store - :param file_ids: List of file IDs to include in the vector store - :param expires_after: (Optional) Expiration policy for the vector store - :param chunking_strategy: (Optional) Strategy for splitting files into chunks - :param metadata: Set of key-value pairs that can be attached to the vector store - """ - - name: str | None = None - file_ids: list[str] | None = None - expires_after: dict[str, Any] | None = None - chunking_strategy: dict[str, Any] | None = None - metadata: dict[str, Any] | None = None - - -# extra_body can be accessed via .model_extra -@json_schema_type -class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): - """Request to create a vector store file batch with extra_body support. - - :param file_ids: A list of File IDs that the vector store should use - :param attributes: (Optional) Key-value attributes to store with the files - :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto - """ - - file_ids: list[str] - attributes: dict[str, Any] | None = None - chunking_strategy: VectorStoreChunkingStrategy | None = None - - -class VectorStoreTable(Protocol): - def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ... - - -@runtime_checkable -@trace_protocol -class VectorIO(Protocol): - vector_store_table: VectorStoreTable | None = None - - # this will just block now until chunks are inserted, but it should - # probably return a Job instance which can be polled for completion - # TODO: rename vector_db_id to vector_store_id once Stainless is working - @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - """Insert chunks into a vector database. - - :param vector_db_id: The identifier of the vector database to insert the chunks into. - :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. - `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. - If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. - If `embedding` is not provided, it will be computed later. - :param ttl_seconds: The time to live of the chunks. - """ - ... - - # TODO: rename vector_db_id to vector_store_id once Stainless is working - @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) - async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, - ) -> QueryChunksResponse: - """Query chunks from a vector database. - - :param vector_db_id: The identifier of the vector database to query. - :param query: The query to search for. - :param params: The parameters of the query. - :returns: A QueryChunksResponse. - """ - ... - - # OpenAI Vector Stores API endpoints - @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) - async def openai_create_vector_store( - self, - params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], - ) -> VectorStoreObject: - """Creates a vector store. - - Generate an OpenAI-compatible vector store with the given parameters. - :returns: A VectorStoreObject representing the created vector store. - """ - ... - - @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) - @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1) - async def openai_list_vector_stores( - self, - limit: int | None = 20, - order: str | None = "desc", - after: str | None = None, - before: str | None = None, - ) -> VectorStoreListResponse: - """Returns a list of vector stores. - - :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. - :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. - :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. - :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. - :returns: A VectorStoreListResponse containing the list of vector stores. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1) - async def openai_retrieve_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreObject: - """Retrieves a vector store. - - :param vector_store_id: The ID of the vector store to retrieve. - :returns: A VectorStoreObject representing the vector store. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod( - route="/vector_stores/{vector_store_id}", - method="POST", - level=LLAMA_STACK_API_V1, - ) - async def openai_update_vector_store( - self, - vector_store_id: str, - name: str | None = None, - expires_after: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - ) -> VectorStoreObject: - """Updates a vector store. - - :param vector_store_id: The ID of the vector store to update. - :param name: The name of the vector store. - :param expires_after: The expiration policy for a vector store. - :param metadata: Set of 16 key-value pairs that can be attached to an object. - :returns: A VectorStoreObject representing the updated vector store. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True - ) - @webmethod( - route="/vector_stores/{vector_store_id}", - method="DELETE", - level=LLAMA_STACK_API_V1, - ) - async def openai_delete_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreDeleteResponse: - """Delete a vector store. - - :param vector_store_id: The ID of the vector store to delete. - :returns: A VectorStoreDeleteResponse indicating the deletion status. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/search", - method="POST", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/search", - method="POST", - level=LLAMA_STACK_API_V1, - ) - async def openai_search_vector_store( - self, - vector_store_id: str, - query: str | list[str], - filters: dict[str, Any] | None = None, - max_num_results: int | None = 10, - ranking_options: SearchRankingOptions | None = None, - rewrite_query: bool | None = False, - search_mode: ( - str | None - ) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations - ) -> VectorStoreSearchResponsePage: - """Search for chunks in a vector store. - - Searches a vector store for relevant chunks based on a query and optional file attribute filters. - - :param vector_store_id: The ID of the vector store to search. - :param query: The query string or array for performing the search. - :param filters: Filters based on file attributes to narrow the search results. - :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10). - :param ranking_options: Ranking options for fine-tuning the search results. - :param rewrite_query: Whether to rewrite the natural language query for vector search (default false) - :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector") - :returns: A VectorStoreSearchResponse containing the search results. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files", - method="POST", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files", - method="POST", - level=LLAMA_STACK_API_V1, - ) - async def openai_attach_file_to_vector_store( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, - ) -> VectorStoreFileObject: - """Attach a file to a vector store. - - :param vector_store_id: The ID of the vector store to attach the file to. - :param file_id: The ID of the file to attach to the vector store. - :param attributes: The key-value attributes stored with the file, which can be used for filtering. - :param chunking_strategy: The chunking strategy to use for the file. - :returns: A VectorStoreFileObject representing the attached file. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files", - method="GET", - level=LLAMA_STACK_API_V1, - ) - async def openai_list_files_in_vector_store( - self, - vector_store_id: str, - limit: int | None = 20, - order: str | None = "desc", - after: str | None = None, - before: str | None = None, - filter: VectorStoreFileStatus | None = None, - ) -> VectorStoreListFilesResponse: - """List files in a vector store. - - :param vector_store_id: The ID of the vector store to list files from. - :param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. - :param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. - :param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list. - :param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list. - :param filter: (Optional) Filter by file status to only return files with the specified status. - :returns: A VectorStoreListFilesResponse containing the list of files. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files/{file_id}", - method="GET", - level=LLAMA_STACK_API_V1, - ) - async def openai_retrieve_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileObject: - """Retrieves a vector store file. - - :param vector_store_id: The ID of the vector store containing the file to retrieve. - :param file_id: The ID of the file to retrieve. - :returns: A VectorStoreFileObject representing the file. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files/{file_id}/content", - method="GET", - level=LLAMA_STACK_API_V1, - ) - async def openai_retrieve_vector_store_file_contents( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileContentsResponse: - """Retrieves the contents of a vector store file. - - :param vector_store_id: The ID of the vector store containing the file to retrieve. - :param file_id: The ID of the file to retrieve. - :returns: A list of InterleavedContent representing the file contents. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", - method="POST", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files/{file_id}", - method="POST", - level=LLAMA_STACK_API_V1, - ) - async def openai_update_vector_store_file( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any], - ) -> VectorStoreFileObject: - """Updates a vector store file. - - :param vector_store_id: The ID of the vector store containing the file to update. - :param file_id: The ID of the file to update. - :param attributes: The updated key-value attributes to store with the file. - :returns: A VectorStoreFileObject representing the updated file. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", - method="DELETE", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/files/{file_id}", - method="DELETE", - level=LLAMA_STACK_API_V1, - ) - async def openai_delete_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileDeleteResponse: - """Delete a vector store file. - - :param vector_store_id: The ID of the vector store containing the file to delete. - :param file_id: The ID of the file to delete. - :returns: A VectorStoreFileDeleteResponse indicating the deletion status. - """ - ... - - @webmethod( - route="/vector_stores/{vector_store_id}/file_batches", - method="POST", - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/file_batches", - method="POST", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - async def openai_create_vector_store_file_batch( - self, - vector_store_id: str, - params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], - ) -> VectorStoreFileBatchObject: - """Create a vector store file batch. - - Generate an OpenAI-compatible vector store file batch for the given vector store. - :param vector_store_id: The ID of the vector store to create the file batch for. - :returns: A VectorStoreFileBatchObject representing the created file batch. - """ - ... - - @webmethod( - route="/vector_stores/{vector_store_id}/file_batches/{batch_id}", - method="GET", - level=LLAMA_STACK_API_V1, - ) - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - async def openai_retrieve_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ) -> VectorStoreFileBatchObject: - """Retrieve a vector store file batch. - - :param batch_id: The ID of the file batch to retrieve. - :param vector_store_id: The ID of the vector store containing the file batch. - :returns: A VectorStoreFileBatchObject representing the file batch. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", - method="GET", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", - method="GET", - level=LLAMA_STACK_API_V1, - ) - async def openai_list_files_in_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - after: str | None = None, - before: str | None = None, - filter: str | None = None, - limit: int | None = 20, - order: str | None = "desc", - ) -> VectorStoreFilesListInBatchResponse: - """Returns a list of vector store files in a batch. - - :param batch_id: The ID of the file batch to list files from. - :param vector_store_id: The ID of the vector store containing the file batch. - :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. - :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. - :param filter: Filter by file status. One of in_progress, completed, failed, cancelled. - :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. - :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. - :returns: A VectorStoreFilesListInBatchResponse containing the list of files in the batch. - """ - ... - - @webmethod( - route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", - method="POST", - level=LLAMA_STACK_API_V1, - deprecated=True, - ) - @webmethod( - route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", - method="POST", - level=LLAMA_STACK_API_V1, - ) - async def openai_cancel_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ) -> VectorStoreFileBatchObject: - """Cancels a vector store file batch. - - :param batch_id: The ID of the file batch to cancel. - :param vector_store_id: The ID of the vector store containing the file batch. - :returns: A VectorStoreFileBatchObject representing the cancelled file batch. - """ - ... diff --git a/llama_stack/apis/vector_stores/__init__.py b/llama_stack/apis/vector_stores/__init__.py deleted file mode 100644 index 8fc34058a..000000000 --- a/llama_stack/apis/vector_stores/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .vector_stores import * diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/llama_stack/apis/vector_stores/vector_stores.py deleted file mode 100644 index 524624028..000000000 --- a/llama_stack/apis/vector_stores/vector_stores.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Literal - -from pydantic import BaseModel - -from llama_stack.apis.resource import Resource, ResourceType - - -# Internal resource type for storing the vector store routing and other information -class VectorStore(Resource): - """Vector database resource for storing and querying vector embeddings. - - :param type: Type of resource, always 'vector_store' for vector stores - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - """ - - type: Literal[ResourceType.vector_store] = ResourceType.vector_store - - embedding_model: str - embedding_dimension: int - vector_store_name: str | None = None - - @property - def vector_store_id(self) -> str: - return self.identifier - - @property - def provider_vector_store_id(self) -> str | None: - return self.provider_resource_id - - -class VectorStoreInput(BaseModel): - """Input parameters for creating or configuring a vector database. - - :param vector_store_id: Unique identifier for the vector store - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store - """ - - vector_store_id: str - embedding_model: str - embedding_dimension: int - provider_id: str | None = None - provider_vector_store_id: str | None = None diff --git a/llama_stack/cli/stack/list_stacks.py b/llama_stack/cli/stack/list_stacks.py deleted file mode 100644 index 2ea0fdeea..000000000 --- a/llama_stack/cli/stack/list_stacks.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -from pathlib import Path - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.table import print_table - - -class StackListBuilds(Subcommand): - """List built stacks in .llama/distributions directory""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "list", - prog="llama stack list", - description="list the build stacks", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._list_stack_command) - - def _get_distribution_dirs(self) -> dict[str, Path]: - """Return a dictionary of distribution names and their paths""" - distributions = {} - dist_dir = Path.home() / ".llama" / "distributions" - - if dist_dir.exists(): - for stack_dir in dist_dir.iterdir(): - if stack_dir.is_dir(): - distributions[stack_dir.name] = stack_dir - return distributions - - def _list_stack_command(self, args: argparse.Namespace) -> None: - distributions = self._get_distribution_dirs() - - if not distributions: - print("No stacks found in ~/.llama/distributions") - return - - headers = ["Stack Name", "Path"] - headers.extend(["Build Config", "Run Config"]) - rows = [] - for name, path in distributions.items(): - row = [name, str(path)] - # Check for build and run config files - build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" - run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" - row.extend([build_config, run_config]) - rows.append(row) - print_table(rows, headers, separate_rows=True) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py deleted file mode 100644 index 06dae7318..000000000 --- a/llama_stack/cli/stack/run.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import os -import ssl -import subprocess -from pathlib import Path - -import uvicorn -import yaml - -from llama_stack.cli.stack.utils import ImageType -from llama_stack.cli.subcommand import Subcommand -from llama_stack.core.datatypes import LoggingConfig, StackRunConfig -from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars -from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro -from llama_stack.log import get_logger - -REPO_ROOT = Path(__file__).parent.parent.parent.parent - -logger = get_logger(name=__name__, category="cli") - - -class StackRun(Subcommand): - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "run", - prog="llama stack run", - description="""Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.""", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_stack_run_cmd) - - def _add_arguments(self): - self.parser.add_argument( - "config", - type=str, - nargs="?", # Make it optional - metavar="config | distro", - help="Path to config file to use for the run or name of known distro (`llama stack list` for a list).", - ) - self.parser.add_argument( - "--port", - type=int, - help="Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT.", - default=int(os.getenv("LLAMA_STACK_PORT", 8321)), - ) - self.parser.add_argument( - "--image-name", - type=str, - default=None, - help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", - ) - self.parser.add_argument( - "--image-type", - type=str, - help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", - choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value], - ) - self.parser.add_argument( - "--enable-ui", - action="store_true", - help="Start the UI server", - ) - - def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: - import yaml - - from llama_stack.core.configure import parse_and_maybe_upgrade_config - - if args.image_type or args.image_name: - self.parser.error( - "The --image-type and --image-name flags are no longer supported.\n\n" - "Please activate your virtual environment manually before running `llama stack run`.\n\n" - "For example:\n" - " source /path/to/venv/bin/activate\n" - " llama stack run \n" - ) - - if args.enable_ui: - self._start_ui_development_server(args.port) - - if args.config: - try: - from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro - - config_file = resolve_config_or_distro(args.config, Mode.RUN) - except ValueError as e: - self.parser.error(str(e)) - else: - config_file = None - - if config_file: - logger.info(f"Using run configuration: {config_file}") - - try: - config_dict = yaml.safe_load(config_file.read_text()) - except yaml.parser.ParserError as e: - self.parser.error(f"failed to load config file '{config_file}':\n {e}") - - try: - config = parse_and_maybe_upgrade_config(config_dict) - if not os.path.exists(str(config.external_providers_dir)): - os.makedirs(str(config.external_providers_dir), exist_ok=True) - except AttributeError as e: - self.parser.error(f"failed to parse config file '{config_file}':\n {e}") - - self._uvicorn_run(config_file, args) - - def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None: - if not config_file: - self.parser.error("Config file is required") - - config_file = resolve_config_or_distro(str(config_file), Mode.RUN) - with open(config_file) as fp: - config_contents = yaml.safe_load(fp) - if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): - logger_config = LoggingConfig(**cfg) - else: - logger_config = None - config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) - - port = args.port or config.server.port - host = config.server.host or ["::", "0.0.0.0"] - - # Set the config file in environment so create_app can find it - os.environ["LLAMA_STACK_CONFIG"] = str(config_file) - - uvicorn_config = { - "factory": True, - "host": host, - "port": port, - "lifespan": "on", - "log_level": logger.getEffectiveLevel(), - "log_config": logger_config, - } - - keyfile = config.server.tls_keyfile - certfile = config.server.tls_certfile - if keyfile and certfile: - uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile - uvicorn_config["ssl_certfile"] = config.server.tls_certfile - if config.server.tls_cafile: - uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile - uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED - - logger.info( - f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}" - ) - else: - logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}") - - logger.info(f"Listening on {host}:{port}") - - # We need to catch KeyboardInterrupt because uvicorn's signal handling - # re-raises SIGINT signals using signal.raise_signal(), which Python - # converts to KeyboardInterrupt. Without this catch, we'd get a confusing - # stack trace when using Ctrl+C or kill -2 (SIGINT). - # SIGTERM (kill -15) works fine without this because Python doesn't - # have a default handler for it. - # - # Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own - # signal handling but this is quite intrusive and not worth the effort. - try: - uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) - except (KeyboardInterrupt, SystemExit): - logger.info("Received interrupt signal, shutting down gracefully...") - - def _start_ui_development_server(self, stack_server_port: int): - logger.info("Attempting to start UI development server...") - # Check if npm is available - npm_check = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=False) - if npm_check.returncode != 0: - logger.warning( - f"'npm' command not found or not executable. UI development server will not be started. Error: {npm_check.stderr}" - ) - return - - ui_dir = REPO_ROOT / "llama_stack" / "ui" - logs_dir = Path("~/.llama/ui/logs").expanduser() - try: - # Create logs directory if it doesn't exist - logs_dir.mkdir(parents=True, exist_ok=True) - - ui_stdout_log_path = logs_dir / "stdout.log" - ui_stderr_log_path = logs_dir / "stderr.log" - - # Open log files in append mode - stdout_log_file = open(ui_stdout_log_path, "a") - stderr_log_file = open(ui_stderr_log_path, "a") - - process = subprocess.Popen( - ["npm", "run", "dev"], - cwd=str(ui_dir), - stdout=stdout_log_file, - stderr=stderr_log_file, - env={**os.environ, "NEXT_PUBLIC_LLAMA_STACK_BASE_URL": f"http://localhost:{stack_server_port}"}, - ) - logger.info(f"UI development server process started in {ui_dir} with PID {process.pid}.") - logger.info(f"Logs: stdout -> {ui_stdout_log_path}, stderr -> {ui_stderr_log_path}") - logger.info(f"UI will be available at http://localhost:{os.getenv('LLAMA_STACK_UI_PORT', 8322)}") - - except FileNotFoundError: - logger.error( - "Failed to start UI development server: 'npm' command not found. Make sure npm is installed and in your PATH." - ) - except Exception as e: - logger.error(f"Failed to start UI development server in {ui_dir}: {e}") diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py deleted file mode 100644 index cc1ca051b..000000000 --- a/llama_stack/cli/stack/utils.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -import sys -from enum import Enum -from functools import lru_cache -from pathlib import Path - -import yaml -from termcolor import cprint - -from llama_stack.core.datatypes import ( - BuildConfig, - Provider, - StackRunConfig, - StorageConfig, -) -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - ServerStoresConfig, - SqliteKVStoreConfig, - SqliteSqlStoreConfig, - SqlStoreReference, -) -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack.providers.datatypes import Api - -TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions" - - -class ImageType(Enum): - CONTAINER = "container" - VENV = "venv" - - -def print_subcommand_description(parser, subparsers): - """Print descriptions of subcommands.""" - description_text = "" - for name, subcommand in subparsers.choices.items(): - description = subcommand.description - description_text += f" {name:<21} {description}\n" - parser.epilog = description_text - - -def generate_run_config( - build_config: BuildConfig, - build_dir: Path, - image_name: str, -) -> Path: - """ - Generate a run.yaml template file for user to edit from a build.yaml file - """ - apis = list(build_config.distribution_spec.providers.keys()) - distro_dir = DISTRIBS_BASE_DIR / image_name - run_config = StackRunConfig( - container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), - image_name=image_name, - apis=apis, - providers={}, - storage=StorageConfig( - backends={ - "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")), - "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")), - }, - stores=ServerStoresConfig( - metadata=KVStoreReference(backend="kv_default", namespace="registry"), - inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), - conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), - ), - ), - external_providers_dir=build_config.external_providers_dir - if build_config.external_providers_dir - else EXTERNAL_PROVIDERS_DIR, - ) - # build providers dict - provider_registry = get_provider_registry(build_config) - for api in apis: - run_config.providers[api] = [] - providers = build_config.distribution_spec.providers[api] - - for provider in providers: - pid = provider.provider_type.split("::")[-1] - - p = provider_registry[Api(api)][provider.provider_type] - if p.deprecation_error: - raise InvalidProviderError(p.deprecation_error) - - try: - config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class) - except (ModuleNotFoundError, ValueError) as exc: - # HACK ALERT: - # This code executes after building is done, the import cannot work since the - # package is either available in the venv or container - not available on the host. - # TODO: use a "is_external" flag in ProviderSpec to check if the provider is - # external - cprint( - f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}", - color="yellow", - file=sys.stderr, - ) - # Set config_type to None to avoid UnboundLocalError - config_type = None - - if config_type is not None and hasattr(config_type, "sample_run_config"): - config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}") - else: - config = {} - - p_spec = Provider( - provider_id=pid, - provider_type=provider.provider_type, - config=config, - module=provider.module, - ) - run_config.providers[api].append(p_spec) - - run_config_file = build_dir / f"{image_name}-run.yaml" - - with open(run_config_file, "w") as f: - to_write = json.loads(run_config.model_dump_json()) - f.write(yaml.dump(to_write, sort_keys=False)) - - # Only print this message for non-container builds since it will be displayed before the - # container is built - # For non-container builds, the run.yaml is generated at the very end of the build process so it - # makes sense to display this message - if build_config.image_type != LlamaStackImageType.CONTAINER.value: - cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) - return run_config_file - - -@lru_cache -def available_templates_specs() -> dict[str, BuildConfig]: - import yaml - - template_specs = {} - for p in TEMPLATES_PATH.rglob("*build.yaml"): - template_name = p.parent.name - with open(p) as f: - build_config = BuildConfig(**yaml.safe_load(f)) - template_specs[template_name] = build_config - return template_specs diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py deleted file mode 100644 index 2ceb9e9be..000000000 --- a/llama_stack/core/build.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import importlib.resources -import sys - -from pydantic import BaseModel -from termcolor import cprint - -from llama_stack.core.datatypes import BuildConfig -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.external import load_external_apis -from llama_stack.core.utils.exec import run_command -from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack.distributions.template import DistributionTemplate -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api - -log = get_logger(name=__name__, category="core") - -# These are the dependencies needed by the distribution server. -# `llama-stack` is automatically installed by the installation script. -SERVER_DEPENDENCIES = [ - "aiosqlite", - "fastapi", - "fire", - "httpx", - "uvicorn", - "opentelemetry-sdk", - "opentelemetry-exporter-otlp-proto-http", -] - - -class ApiInput(BaseModel): - api: Api - provider: str - - -def get_provider_dependencies( - config: BuildConfig | DistributionTemplate, -) -> tuple[list[str], list[str], list[str]]: - """Get normal and special dependencies from provider configuration.""" - if isinstance(config, DistributionTemplate): - config = config.build_config() - - providers = config.distribution_spec.providers - additional_pip_packages = config.additional_pip_packages - - deps = [] - external_provider_deps = [] - registry = get_provider_registry(config) - for api_str, provider_or_providers in providers.items(): - providers_for_api = registry[Api(api_str)] - - providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers] - - for provider in providers: - # Providers from BuildConfig and RunConfig are subtly different - not great - provider_type = provider if isinstance(provider, str) else provider.provider_type - - if provider_type not in providers_for_api: - raise ValueError(f"Provider `{provider}` is not available for API `{api_str}`") - - provider_spec = providers_for_api[provider_type] - if hasattr(provider_spec, "is_external") and provider_spec.is_external: - # this ensures we install the top level module for our external providers - if provider_spec.module: - if isinstance(provider_spec.module, str): - external_provider_deps.append(provider_spec.module) - else: - external_provider_deps.extend(provider_spec.module) - if hasattr(provider_spec, "pip_packages"): - deps.extend(provider_spec.pip_packages) - if hasattr(provider_spec, "container_image") and provider_spec.container_image: - raise ValueError("A stack's dependencies cannot have a container image") - - normal_deps = [] - special_deps = [] - for package in deps: - if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]): - special_deps.append(package) - else: - normal_deps.append(package) - - normal_deps.extend(additional_pip_packages or []) - - return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps)) - - -def print_pip_install_help(config: BuildConfig): - normal_deps, special_deps, _ = get_provider_dependencies(config) - - cprint( - f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}", - color="yellow", - file=sys.stderr, - ) - for special_dep in special_deps: - cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr) - print() - - -def build_image( - build_config: BuildConfig, - image_name: str, - distro_or_config: str, - run_config: str | None = None, -): - container_base = build_config.distribution_spec.container_image or "python:3.12-slim" - - normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config) - normal_deps += SERVER_DEPENDENCIES - if build_config.external_apis_dir: - external_apis = load_external_apis(build_config) - if external_apis: - for _, api_spec in external_apis.items(): - normal_deps.extend(api_spec.pip_packages) - - if build_config.image_type == LlamaStackImageType.CONTAINER.value: - script = str(importlib.resources.files("llama_stack") / "core/build_container.sh") - args = [ - script, - "--distro-or-config", - distro_or_config, - "--image-name", - image_name, - "--container-base", - container_base, - "--normal-deps", - " ".join(normal_deps), - ] - # When building from a config file (not a template), include the run config path in the - # build arguments - if run_config is not None: - args.extend(["--run-config", run_config]) - else: - script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh") - args = [ - script, - "--env-name", - str(image_name), - "--normal-deps", - " ".join(normal_deps), - ] - - # Always pass both arguments, even if empty, to maintain consistent positional arguments - if special_deps: - args.extend(["--optional-deps", "#".join(special_deps)]) - if external_provider_deps: - args.extend( - ["--external-provider-deps", "#".join(external_provider_deps)] - ) # the script will install external provider module, get its deps, and install those too. - - return_code = run_command(args) - - if return_code != 0: - log.error( - f"Failed to build target {image_name} with return code {return_code}", - ) - - return return_code diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py deleted file mode 100644 index 66880ca36..000000000 --- a/llama_stack/core/conversations/conversations.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import secrets -import time -from typing import Any - -from openai import NOT_GIVEN -from pydantic import BaseModel, TypeAdapter - -from llama_stack.apis.conversations.conversations import ( - Conversation, - ConversationDeletedResource, - ConversationItem, - ConversationItemDeletedResource, - ConversationItemList, - Conversations, - Metadata, -) -from llama_stack.core.datatypes import AccessRule, StackRunConfig -from llama_stack.log import get_logger -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl - -logger = get_logger(name=__name__, category="openai_conversations") - - -class ConversationServiceConfig(BaseModel): - """Configuration for the built-in conversation service. - - :param run_config: Stack run configuration for resolving persistence - :param policy: Access control rules - """ - - run_config: StackRunConfig - policy: list[AccessRule] = [] - - -async def get_provider_impl(config: ConversationServiceConfig, deps: dict[Any, Any]): - """Get the conversation service implementation.""" - impl = ConversationServiceImpl(config, deps) - await impl.initialize() - return impl - - -class ConversationServiceImpl(Conversations): - """Built-in conversation service implementation using AuthorizedSqlStore.""" - - def __init__(self, config: ConversationServiceConfig, deps: dict[Any, Any]): - self.config = config - self.deps = deps - self.policy = config.policy - - # Use conversations store reference from run config - conversations_ref = config.run_config.storage.stores.conversations - if not conversations_ref: - raise ValueError("storage.stores.conversations must be configured in run config") - - base_sql_store = sqlstore_impl(conversations_ref) - self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) - - async def initialize(self) -> None: - """Initialize the store and create tables.""" - await self.sql_store.create_table( - "openai_conversations", - { - "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "created_at": ColumnType.INTEGER, - "items": ColumnType.JSON, - "metadata": ColumnType.JSON, - }, - ) - - await self.sql_store.create_table( - "conversation_items", - { - "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "conversation_id": ColumnType.STRING, - "created_at": ColumnType.INTEGER, - "item_data": ColumnType.JSON, - }, - ) - - async def create_conversation( - self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None - ) -> Conversation: - """Create a conversation.""" - random_bytes = secrets.token_bytes(24) - conversation_id = f"conv_{random_bytes.hex()}" - created_at = int(time.time()) - - record_data = { - "id": conversation_id, - "created_at": created_at, - "items": [], - "metadata": metadata, - } - - await self.sql_store.insert( - table="openai_conversations", - data=record_data, - ) - - if items: - item_records = [] - for item in items: - item_dict = item.model_dump() - item_id = self._get_or_generate_item_id(item, item_dict) - - item_record = { - "id": item_id, - "conversation_id": conversation_id, - "created_at": created_at, - "item_data": item_dict, - } - - item_records.append(item_record) - - await self.sql_store.insert(table="conversation_items", data=item_records) - - conversation = Conversation( - id=conversation_id, - created_at=created_at, - metadata=metadata, - object="conversation", - ) - - logger.debug(f"Created conversation {conversation_id}") - return conversation - - async def get_conversation(self, conversation_id: str) -> Conversation: - """Get a conversation with the given ID.""" - record = await self.sql_store.fetch_one(table="openai_conversations", where={"id": conversation_id}) - - if record is None: - raise ValueError(f"Conversation {conversation_id} not found") - - return Conversation( - id=record["id"], created_at=record["created_at"], metadata=record.get("metadata"), object="conversation" - ) - - async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation: - """Update a conversation's metadata with the given ID""" - await self.sql_store.update( - table="openai_conversations", data={"metadata": metadata}, where={"id": conversation_id} - ) - - return await self.get_conversation(conversation_id) - - async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource: - """Delete a conversation with the given ID.""" - await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id}) - - logger.debug(f"Deleted conversation {conversation_id}") - return ConversationDeletedResource(id=conversation_id) - - def _validate_conversation_id(self, conversation_id: str) -> None: - """Validate conversation ID format.""" - if not conversation_id.startswith("conv_"): - raise ValueError( - f"Invalid 'conversation_id': '{conversation_id}'. Expected an ID that begins with 'conv_'." - ) - - def _get_or_generate_item_id(self, item: ConversationItem, item_dict: dict) -> str: - """Get existing item ID or generate one if missing.""" - if item.id is None: - random_bytes = secrets.token_bytes(24) - if item.type == "message": - item_id = f"msg_{random_bytes.hex()}" - else: - item_id = f"item_{random_bytes.hex()}" - item_dict["id"] = item_id - return item_id - return item.id - - async def _get_validated_conversation(self, conversation_id: str) -> Conversation: - """Validate conversation ID and return the conversation if it exists.""" - self._validate_conversation_id(conversation_id) - return await self.get_conversation(conversation_id) - - async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList: - """Create (add) items to a conversation.""" - await self._get_validated_conversation(conversation_id) - - created_items = [] - base_time = int(time.time()) - - for i, item in enumerate(items): - item_dict = item.model_dump() - item_id = self._get_or_generate_item_id(item, item_dict) - - # make each timestamp unique to maintain order - created_at = base_time + i - - item_record = { - "id": item_id, - "conversation_id": conversation_id, - "created_at": created_at, - "item_data": item_dict, - } - - # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update - try: - await self.sql_store.insert(table="conversation_items", data=item_record) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_items", - data={"created_at": created_at, "item_data": item_dict}, - where={"id": item_id}, - ) - - created_items.append(item_dict) - - logger.debug(f"Created {len(created_items)} items in conversation {conversation_id}") - - # Convert created items (dicts) to proper ConversationItem types - adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) - response_items: list[ConversationItem] = [adapter.validate_python(item_dict) for item_dict in created_items] - - return ConversationItemList( - data=response_items, - first_id=created_items[0]["id"] if created_items else None, - last_id=created_items[-1]["id"] if created_items else None, - has_more=False, - ) - - async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem: - """Retrieve a conversation item.""" - if not conversation_id: - raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") - if not item_id: - raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}") - - # Get item from conversation_items table - record = await self.sql_store.fetch_one( - table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} - ) - - if record is None: - raise ValueError(f"Item {item_id} not found in conversation {conversation_id}") - - adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) - return adapter.validate_python(record["item_data"]) - - async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN): - """List items in the conversation.""" - if not conversation_id: - raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") - - # check if conversation exists - await self.get_conversation(conversation_id) - - result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id}) - records = result.data - - if order != NOT_GIVEN and order == "asc": - records.sort(key=lambda x: x["created_at"]) - else: - records.sort(key=lambda x: x["created_at"], reverse=True) - - actual_limit = 20 - if limit != NOT_GIVEN and isinstance(limit, int): - actual_limit = limit - - records = records[:actual_limit] - items = [record["item_data"] for record in records] - - adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) - response_items: list[ConversationItem] = [adapter.validate_python(item) for item in items] - - first_id = response_items[0].id if response_items else None - last_id = response_items[-1].id if response_items else None - - return ConversationItemList( - data=response_items, - first_id=first_id, - last_id=last_id, - has_more=False, - ) - - async def openai_delete_conversation_item( - self, conversation_id: str, item_id: str - ) -> ConversationItemDeletedResource: - """Delete a conversation item.""" - if not conversation_id: - raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") - if not item_id: - raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}") - - _ = await self._get_validated_conversation(conversation_id) - - record = await self.sql_store.fetch_one( - table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} - ) - - if record is None: - raise ValueError(f"Item {item_id} not found in conversation {conversation_id}") - - await self.sql_store.delete( - table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} - ) - - logger.debug(f"Deleted item {item_id} from conversation {conversation_id}") - return ConversationItemDeletedResource(id=item_id) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py deleted file mode 100644 index 5f4775d87..000000000 --- a/llama_stack/core/datatypes.py +++ /dev/null @@ -1,621 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import StrEnum -from pathlib import Path -from typing import Annotated, Any, Literal, Self -from urllib.parse import urlparse - -from pydantic import BaseModel, Field, field_validator, model_validator - -from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset, DatasetInput -from llama_stack.apis.eval import Eval -from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Model, ModelInput -from llama_stack.apis.resource import Resource -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput -from llama_stack.apis.shields import Shield, ShieldInput -from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput -from llama_stack.core.access_control.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ( - KVStoreReference, - StorageBackendType, - StorageConfig, -) -from llama_stack.providers.datatypes import Api, ProviderSpec - -LLAMA_STACK_BUILD_CONFIG_VERSION = 2 -LLAMA_STACK_RUN_CONFIG_VERSION = 2 - - -RoutingKey = str | list[str] - - -class RegistryEntrySource(StrEnum): - via_register_api = "via_register_api" - listed_from_provider = "listed_from_provider" - - -class User(BaseModel): - principal: str - # further attributes that may be used for access control decisions - attributes: dict[str, list[str]] | None = None - - def __init__(self, principal: str, attributes: dict[str, list[str]] | None): - super().__init__(principal=principal, attributes=attributes) - - -class ResourceWithOwner(Resource): - """Extension of Resource that adds an optional owner, i.e. the user that created the - resource. This can be used to constrain access to the resource.""" - - owner: User | None = None - source: RegistryEntrySource = RegistryEntrySource.via_register_api - - -# Use the extended Resource for all routable objects -class ModelWithOwner(Model, ResourceWithOwner): - pass - - -class ShieldWithOwner(Shield, ResourceWithOwner): - pass - - -class VectorStoreWithOwner(VectorStore, ResourceWithOwner): - pass - - -class DatasetWithOwner(Dataset, ResourceWithOwner): - pass - - -class ScoringFnWithOwner(ScoringFn, ResourceWithOwner): - pass - - -class BenchmarkWithOwner(Benchmark, ResourceWithOwner): - pass - - -class ToolGroupWithOwner(ToolGroup, ResourceWithOwner): - pass - - -RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup - -RoutableObjectWithProvider = Annotated[ - ModelWithOwner - | ShieldWithOwner - | VectorStoreWithOwner - | DatasetWithOwner - | ScoringFnWithOwner - | BenchmarkWithOwner - | ToolGroupWithOwner, - Field(discriminator="type"), -] - -RoutedProtocol = Inference | Safety | VectorIO | DatasetIO | Scoring | Eval | ToolRuntime - - -# Example: /inference, /safety -class AutoRoutedProviderSpec(ProviderSpec): - provider_type: str = "router" - config_class: str = "" - - container_image: str | None = None - routing_table_api: Api - module: str - provider_data_validator: str | None = Field( - default=None, - ) - - -# Example: /models, /shields -class RoutingTableProviderSpec(ProviderSpec): - provider_type: str = "routing_table" - config_class: str = "" - container_image: str | None = None - - router_api: Api - module: str - pip_packages: list[str] = Field(default_factory=list) - - -class Provider(BaseModel): - # provider_id of None means that the provider is not enabled - this happens - # when the provider is enabled via a conditional environment variable - provider_id: str | None - provider_type: str - config: dict[str, Any] = {} - module: str | None = Field( - default=None, - description=""" - Fully-qualified name of the external provider module to import. The module is expected to have: - - - `get_adapter_impl(config, deps)`: returns the adapter implementation - - Example: `module: ramalama_stack` - """, - ) - - -class BuildProvider(BaseModel): - provider_type: str - module: str | None = Field( - default=None, - description=""" - Fully-qualified name of the external provider module to import. The module is expected to have: - - - `get_adapter_impl(config, deps)`: returns the adapter implementation - - Example: `module: ramalama_stack` - """, - ) - - -class DistributionSpec(BaseModel): - description: str | None = Field( - default="", - description="Description of the distribution", - ) - container_image: str | None = None - providers: dict[str, list[BuildProvider]] = Field( - default_factory=dict, - description=""" - Provider Types for each of the APIs provided by this distribution. If you - select multiple providers, you should provide an appropriate 'routing_map' - in the runtime configuration to help route to the correct provider. - """, - ) - - -class TelemetryConfig(BaseModel): - """ - Configuration for telemetry. - - Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/ - for env variables to configure the OpenTelemetry SDK. - - Example: - ```bash - OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter - ``` - """ - - enabled: bool = Field(default=False, description="enable or disable telemetry") - - -class LoggingConfig(BaseModel): - category_levels: dict[str, str] = Field( - default_factory=dict, - description=""" - Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""", - ) - - -class OAuth2JWKSConfig(BaseModel): - # The JWKS URI for collecting public keys - uri: str - token: str | None = Field(default=None, description="token to authorise access to jwks") - key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates") - - -class OAuth2IntrospectionConfig(BaseModel): - url: str - client_id: str - client_secret: str - send_secret_in_body: bool = False - - -class AuthProviderType(StrEnum): - """Supported authentication provider types.""" - - OAUTH2_TOKEN = "oauth2_token" - GITHUB_TOKEN = "github_token" - CUSTOM = "custom" - KUBERNETES = "kubernetes" - - -class OAuth2TokenAuthConfig(BaseModel): - """Configuration for OAuth2 token authentication.""" - - type: Literal[AuthProviderType.OAUTH2_TOKEN] = AuthProviderType.OAUTH2_TOKEN - audience: str = Field(default="llama-stack") - verify_tls: bool = Field(default=True) - tls_cafile: Path | None = Field(default=None) - issuer: str | None = Field(default=None, description="The OIDC issuer URL.") - claims_mapping: dict[str, str] = Field( - default_factory=lambda: { - "sub": "roles", - "username": "roles", - "groups": "teams", - "team": "teams", - "project": "projects", - "tenant": "namespaces", - "namespace": "namespaces", - }, - ) - jwks: OAuth2JWKSConfig | None = Field(default=None, description="JWKS configuration") - introspection: OAuth2IntrospectionConfig | None = Field( - default=None, description="OAuth2 introspection configuration" - ) - - @classmethod - @field_validator("claims_mapping") - def validate_claims_mapping(cls, v): - for key, value in v.items(): - if not value: - raise ValueError(f"claims_mapping value cannot be empty: {key}") - return v - - @model_validator(mode="after") - def validate_mode(self) -> Self: - if not self.jwks and not self.introspection: - raise ValueError("One of jwks or introspection must be configured") - if self.jwks and self.introspection: - raise ValueError("At present only one of jwks or introspection should be configured") - return self - - -class CustomAuthConfig(BaseModel): - """Configuration for custom authentication.""" - - type: Literal[AuthProviderType.CUSTOM] = AuthProviderType.CUSTOM - endpoint: str = Field( - ..., - description="Custom authentication endpoint URL", - ) - - -class GitHubTokenAuthConfig(BaseModel): - """Configuration for GitHub token authentication.""" - - type: Literal[AuthProviderType.GITHUB_TOKEN] = AuthProviderType.GITHUB_TOKEN - github_api_base_url: str = Field( - default="https://api.github.com", - description="Base URL for GitHub API (use https://api.github.com for public GitHub)", - ) - claims_mapping: dict[str, str] = Field( - default_factory=lambda: { - "login": "roles", - "organizations": "teams", - }, - description="Mapping from GitHub user fields to access attributes", - ) - - -class KubernetesAuthProviderConfig(BaseModel): - """Configuration for Kubernetes authentication provider.""" - - type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES - api_server_url: str = Field( - default="https://kubernetes.default.svc", - description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)", - ) - verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates") - tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification") - claims_mapping: dict[str, str] = Field( - default_factory=lambda: { - "username": "roles", - "groups": "roles", - }, - description="Mapping of Kubernetes user claims to access attributes", - ) - - @field_validator("api_server_url") - @classmethod - def validate_api_server_url(cls, v): - parsed = urlparse(v) - if not parsed.scheme or not parsed.netloc: - raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}") - if parsed.scheme not in ["http", "https"]: - raise ValueError(f"api_server_url scheme must be http or https: {v}") - return v - - @field_validator("claims_mapping") - @classmethod - def validate_claims_mapping(cls, v): - for key, value in v.items(): - if not value: - raise ValueError(f"claims_mapping value cannot be empty: {key}") - return v - - -AuthProviderConfig = Annotated[ - OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig, - Field(discriminator="type"), -] - - -class AuthenticationConfig(BaseModel): - """Top-level authentication configuration.""" - - provider_config: AuthProviderConfig = Field( - ..., - description="Authentication provider configuration", - ) - access_policy: list[AccessRule] = Field( - default=[], - description="Rules for determining access to resources", - ) - - -class AuthenticationRequiredError(Exception): - pass - - -class QualifiedModel(BaseModel): - """A qualified model identifier, consisting of a provider ID and a model ID.""" - - provider_id: str - model_id: str - - -class VectorStoresConfig(BaseModel): - """Configuration for vector stores in the stack.""" - - default_provider_id: str | None = Field( - default=None, - description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.", - ) - default_embedding_model: QualifiedModel | None = Field( - default=None, - description="Default embedding model configuration for vector stores.", - ) - - -class QuotaPeriod(StrEnum): - DAY = "day" - - -class QuotaConfig(BaseModel): - kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") - anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") - authenticated_max_requests: int = Field( - default=1000, description="Max requests for authenticated clients per period" - ) - period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set") - - -class CORSConfig(BaseModel): - allow_origins: list[str] = Field(default_factory=list) - allow_origin_regex: str | None = Field(default=None) - allow_methods: list[str] = Field(default=["OPTIONS"]) - allow_headers: list[str] = Field(default_factory=list) - allow_credentials: bool = Field(default=False) - expose_headers: list[str] = Field(default_factory=list) - max_age: int = Field(default=600, ge=0) - - @model_validator(mode="after") - def validate_credentials_config(self) -> Self: - if self.allow_credentials and (self.allow_origins == ["*"] or "*" in self.allow_origins): - raise ValueError("Cannot use wildcard origins with credentials enabled") - return self - - -def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | None: - if cors_config is False or cors_config is None: - return None - - if cors_config is True: - # dev mode: allow localhost on any port - return CORSConfig( - allow_origins=[], - allow_origin_regex=r"https?://localhost:\d+", - allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], - allow_headers=["Content-Type", "Authorization", "X-Requested-With"], - ) - - if isinstance(cors_config, CORSConfig): - return cors_config - - raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}") - - -class RegisteredResources(BaseModel): - """Registry of resources available in the distribution.""" - - models: list[ModelInput] = Field(default_factory=list) - shields: list[ShieldInput] = Field(default_factory=list) - vector_stores: list[VectorStoreInput] = Field(default_factory=list) - datasets: list[DatasetInput] = Field(default_factory=list) - scoring_fns: list[ScoringFnInput] = Field(default_factory=list) - benchmarks: list[BenchmarkInput] = Field(default_factory=list) - tool_groups: list[ToolGroupInput] = Field(default_factory=list) - - -class ServerConfig(BaseModel): - port: int = Field( - default=8321, - description="Port to listen on", - ge=1024, - le=65535, - ) - tls_certfile: str | None = Field( - default=None, - description="Path to TLS certificate file for HTTPS", - ) - tls_keyfile: str | None = Field( - default=None, - description="Path to TLS key file for HTTPS", - ) - tls_cafile: str | None = Field( - default=None, - description="Path to TLS CA file for HTTPS with mutual TLS authentication", - ) - auth: AuthenticationConfig | None = Field( - default=None, - description="Authentication configuration for the server", - ) - host: str | None = Field( - default=None, - description="The host the server should listen on", - ) - quota: QuotaConfig | None = Field( - default=None, - description="Per client quota request configuration", - ) - cors: bool | CORSConfig | None = Field( - default=None, - description="CORS configuration for cross-origin requests. Can be:\n" - "- true: Enable localhost CORS for development\n" - "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration", - ) - - -class StackRunConfig(BaseModel): - version: int = LLAMA_STACK_RUN_CONFIG_VERSION - - image_name: str = Field( - ..., - description=""" -Reference to the distribution this package refers to. For unregistered (adhoc) packages, -this could be just a hash -""", - ) - container_image: str | None = Field( - default=None, - description="Reference to the container image if this package refers to a container", - ) - apis: list[str] = Field( - default_factory=list, - description=""" -The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", - ) - - providers: dict[str, list[Provider]] = Field( - description=""" -One or more providers to use for each API. The same provider_type (e.g., meta-reference) -can be instantiated multiple times (with different configs) if necessary. -""", - ) - storage: StorageConfig = Field( - description="Catalog of named storage backends and references available to the stack", - ) - - registered_resources: RegisteredResources = Field( - default_factory=RegisteredResources, - description="Registry of resources available in the distribution", - ) - - logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging") - - telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry") - - server: ServerConfig = Field( - default_factory=ServerConfig, - description="Configuration for the HTTP(S) server", - ) - - external_providers_dir: Path | None = Field( - default=None, - description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.", - ) - - external_apis_dir: Path | None = Field( - default=None, - description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", - ) - - vector_stores: VectorStoresConfig | None = Field( - default=None, - description="Configuration for vector stores, including default embedding model", - ) - - @field_validator("external_providers_dir") - @classmethod - def validate_external_providers_dir(cls, v): - if v is None: - return None - if isinstance(v, str): - return Path(v) - return v - - @model_validator(mode="after") - def validate_server_stores(self) -> "StackRunConfig": - backend_map = self.storage.backends - stores = self.storage.stores - kv_backends = { - name - for name, cfg in backend_map.items() - if cfg.type - in { - StorageBackendType.KV_REDIS, - StorageBackendType.KV_SQLITE, - StorageBackendType.KV_POSTGRES, - StorageBackendType.KV_MONGODB, - } - } - sql_backends = { - name - for name, cfg in backend_map.items() - if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES} - } - - def _ensure_backend(reference, expected_set, store_name: str) -> None: - if reference is None: - return - backend_name = reference.backend - if backend_name not in backend_map: - raise ValueError( - f"{store_name} references unknown backend '{backend_name}'. " - f"Available backends: {sorted(backend_map)}" - ) - if backend_name not in expected_set: - raise ValueError( - f"{store_name} references backend '{backend_name}' of type " - f"'{backend_map[backend_name].type.value}', but a backend of type " - f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." - ) - - _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata") - _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") - _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") - _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") - return self - - -class BuildConfig(BaseModel): - version: int = LLAMA_STACK_BUILD_CONFIG_VERSION - - distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") - image_type: str = Field( - default="venv", - description="Type of package to build (container | venv)", - ) - image_name: str | None = Field( - default=None, - description="Name of the distribution to build", - ) - external_providers_dir: Path | None = Field( - default=None, - description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. " - "pip_packages MUST contain the provider package name.", - ) - additional_pip_packages: list[str] = Field( - default_factory=list, - description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.", - ) - external_apis_dir: Path | None = Field( - default=None, - description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", - ) - - @field_validator("external_providers_dir") - @classmethod - def validate_external_providers_dir(cls, v): - if v is None: - return None - if isinstance(v, str): - return Path(v) - return v diff --git a/llama_stack/core/inspect.py b/llama_stack/core/inspect.py deleted file mode 100644 index 37dab4199..000000000 --- a/llama_stack/core/inspect.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from importlib.metadata import version - -from pydantic import BaseModel - -from llama_stack.apis.inspect import ( - HealthInfo, - Inspect, - ListRoutesResponse, - RouteInfo, - VersionInfo, -) -from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.external import load_external_apis -from llama_stack.core.server.routes import get_all_api_routes -from llama_stack.providers.datatypes import HealthStatus - - -class DistributionInspectConfig(BaseModel): - run_config: StackRunConfig - - -async def get_provider_impl(config, deps): - impl = DistributionInspectImpl(config, deps) - await impl.initialize() - return impl - - -class DistributionInspectImpl(Inspect): - def __init__(self, config: DistributionInspectConfig, deps): - self.config = config - self.deps = deps - - async def initialize(self) -> None: - pass - - async def list_routes(self) -> ListRoutesResponse: - run_config: StackRunConfig = self.config.run_config - - ret = [] - external_apis = load_external_apis(run_config) - all_endpoints = get_all_api_routes(external_apis) - for api, endpoints in all_endpoints.items(): - # Always include provider and inspect APIs, filter others based on run config - if api.value in ["providers", "inspect"]: - ret.extend( - [ - RouteInfo( - route=e.path, - method=next(iter([m for m in e.methods if m != "HEAD"])), - provider_types=[], # These APIs don't have "real" providers - they're internal to the stack - ) - for e, _ in endpoints - if e.methods is not None - ] - ) - else: - providers = run_config.providers.get(api.value, []) - if providers: # Only process if there are providers for this API - ret.extend( - [ - RouteInfo( - route=e.path, - method=next(iter([m for m in e.methods if m != "HEAD"])), - provider_types=[p.provider_type for p in providers], - ) - for e, _ in endpoints - if e.methods is not None - ] - ) - - return ListRoutesResponse(data=ret) - - async def health(self) -> HealthInfo: - return HealthInfo(status=HealthStatus.OK) - - async def version(self) -> VersionInfo: - return VersionInfo(version=version("llama-stack")) - - async def shutdown(self) -> None: - pass diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py deleted file mode 100644 index 856397ca5..000000000 --- a/llama_stack/core/prompts/prompts.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from typing import Any - -from pydantic import BaseModel - -from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts -from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl - - -class PromptServiceConfig(BaseModel): - """Configuration for the built-in prompt service. - - :param run_config: Stack run configuration containing distribution info - """ - - run_config: StackRunConfig - - -async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]): - """Get the prompt service implementation.""" - impl = PromptServiceImpl(config, deps) - await impl.initialize() - return impl - - -class PromptServiceImpl(Prompts): - """Built-in prompt service implementation using KVStore.""" - - def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]): - self.config = config - self.deps = deps - self.kvstore: KVStore - - async def initialize(self) -> None: - # Use metadata store backend with prompts-specific namespace - metadata_ref = self.config.run_config.storage.stores.metadata - if not metadata_ref: - raise ValueError("storage.stores.metadata must be configured in run config") - prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) - self.kvstore = await kvstore_impl(prompts_ref) - - def _get_default_key(self, prompt_id: str) -> str: - """Get the KVStore key that stores the default version number.""" - return f"prompts:v1:{prompt_id}:default" - - async def _get_prompt_key(self, prompt_id: str, version: int | None = None) -> str: - """Get the KVStore key for prompt data, returning default version if applicable.""" - if version: - return self._get_version_key(prompt_id, str(version)) - - default_key = self._get_default_key(prompt_id) - resolved_version = await self.kvstore.get(default_key) - if resolved_version is None: - raise ValueError(f"Prompt {prompt_id}:default not found") - return self._get_version_key(prompt_id, resolved_version) - - def _get_version_key(self, prompt_id: str, version: str) -> str: - """Get the KVStore key for a specific prompt version.""" - return f"prompts:v1:{prompt_id}:{version}" - - def _get_list_key_prefix(self) -> str: - """Get the key prefix for listing prompts.""" - return "prompts:v1:" - - def _serialize_prompt(self, prompt: Prompt) -> str: - """Serialize a prompt to JSON string for storage.""" - return json.dumps( - { - "prompt_id": prompt.prompt_id, - "prompt": prompt.prompt, - "version": prompt.version, - "variables": prompt.variables or [], - "is_default": prompt.is_default, - } - ) - - def _deserialize_prompt(self, data: str) -> Prompt: - """Deserialize a prompt from JSON string.""" - obj = json.loads(data) - return Prompt( - prompt_id=obj["prompt_id"], - prompt=obj["prompt"], - version=obj["version"], - variables=obj.get("variables", []), - is_default=obj.get("is_default", False), - ) - - async def list_prompts(self) -> ListPromptsResponse: - """List all prompts (default versions only).""" - prefix = self._get_list_key_prefix() - keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") - - prompts = [] - for key in keys: - if key.endswith(":default"): - try: - default_version = await self.kvstore.get(key) - if default_version: - prompt_id = key.replace(prefix, "").replace(":default", "") - version_key = self._get_version_key(prompt_id, default_version) - data = await self.kvstore.get(version_key) - if data: - prompt = self._deserialize_prompt(data) - prompts.append(prompt) - except (json.JSONDecodeError, KeyError): - continue - - prompts.sort(key=lambda p: p.prompt_id or "", reverse=True) - return ListPromptsResponse(data=prompts) - - async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt: - """Get a prompt by its identifier and optional version.""" - key = await self._get_prompt_key(prompt_id, version) - data = await self.kvstore.get(key) - if data is None: - raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found") - return self._deserialize_prompt(data) - - async def create_prompt( - self, - prompt: str, - variables: list[str] | None = None, - ) -> Prompt: - """Create a new prompt.""" - if variables is None: - variables = [] - - prompt_obj = Prompt( - prompt_id=Prompt.generate_prompt_id(), - prompt=prompt, - version=1, - variables=variables, - ) - - version_key = self._get_version_key(prompt_obj.prompt_id, str(prompt_obj.version)) - data = self._serialize_prompt(prompt_obj) - await self.kvstore.set(version_key, data) - - default_key = self._get_default_key(prompt_obj.prompt_id) - await self.kvstore.set(default_key, str(prompt_obj.version)) - - return prompt_obj - - async def update_prompt( - self, - prompt_id: str, - prompt: str, - version: int, - variables: list[str] | None = None, - set_as_default: bool = True, - ) -> Prompt: - """Update an existing prompt (increments version).""" - if version < 1: - raise ValueError("Version must be >= 1") - if variables is None: - variables = [] - - prompt_versions = await self.list_prompt_versions(prompt_id) - latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version)) - - if version and latest_prompt.version != version: - raise ValueError( - f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request." - ) - - current_version = latest_prompt.version if version is None else version - new_version = current_version + 1 - - updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables) - - version_key = self._get_version_key(prompt_id, str(new_version)) - data = self._serialize_prompt(updated_prompt) - await self.kvstore.set(version_key, data) - - if set_as_default: - await self.set_default_version(prompt_id, new_version) - - return updated_prompt - - async def delete_prompt(self, prompt_id: str) -> None: - """Delete a prompt and all its versions.""" - await self.get_prompt(prompt_id) - - prefix = f"prompts:v1:{prompt_id}:" - keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") - - for key in keys: - await self.kvstore.delete(key) - - async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse: - """List all versions of a specific prompt.""" - prefix = f"prompts:v1:{prompt_id}:" - keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") - - default_version = None - prompts = [] - - for key in keys: - data = await self.kvstore.get(key) - if key.endswith(":default"): - default_version = data - else: - if data: - prompt_obj = self._deserialize_prompt(data) - prompts.append(prompt_obj) - - if not prompts: - raise ValueError(f"Prompt {prompt_id} not found") - - for prompt in prompts: - prompt.is_default = str(prompt.version) == default_version - - prompts.sort(key=lambda x: x.version) - return ListPromptsResponse(data=prompts) - - async def set_default_version(self, prompt_id: str, version: int) -> Prompt: - """Set which version of a prompt should be the default, If not set. the default is the latest.""" - version_key = self._get_version_key(prompt_id, str(version)) - data = await self.kvstore.get(version_key) - if data is None: - raise ValueError(f"Prompt {prompt_id} version {version} not found") - - default_key = self._get_default_key(prompt_id) - await self.kvstore.set(default_key, str(version)) - - return self._deserialize_prompt(data) diff --git a/llama_stack/core/providers.py b/llama_stack/core/providers.py deleted file mode 100644 index 7095ffd18..000000000 --- a/llama_stack/core/providers.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -from typing import Any - -from pydantic import BaseModel - -from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus - -from .datatypes import StackRunConfig -from .utils.config import redact_sensitive_fields - -logger = get_logger(name=__name__, category="core") - - -class ProviderImplConfig(BaseModel): - run_config: StackRunConfig - - -async def get_provider_impl(config, deps): - impl = ProviderImpl(config, deps) - await impl.initialize() - return impl - - -class ProviderImpl(Providers): - def __init__(self, config, deps): - self.config = config - self.deps = deps - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - logger.debug("ProviderImpl.shutdown") - pass - - async def list_providers(self) -> ListProvidersResponse: - run_config = self.config.run_config - safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump())) - providers_health = await self.get_providers_health() - ret = [] - for api, providers in safe_config.providers.items(): - for p in providers: - # Skip providers that are not enabled - if p.provider_id is None: - continue - ret.append( - ProviderInfo( - api=api, - provider_id=p.provider_id, - provider_type=p.provider_type, - config=p.config, - health=providers_health.get(api, {}).get( - p.provider_id, - HealthResponse( - status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check" - ), - ), - ) - ) - - return ListProvidersResponse(data=ret) - - async def inspect_provider(self, provider_id: str) -> ProviderInfo: - all_providers = await self.list_providers() - for p in all_providers.data: - if p.provider_id == provider_id: - return p - - raise ValueError(f"Provider {provider_id} not found") - - async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]: - """Get health status for all providers. - - Returns: - Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses. - Each API maps to a dictionary of provider IDs to their health responses. - """ - providers_health: dict[str, dict[str, HealthResponse]] = {} - - # The timeout has to be long enough to allow all the providers to be checked, especially in - # the case of the inference router health check since it checks all registered inference - # providers. - # The timeout must not be equal to the one set by health method for a given implementation, - # otherwise we will miss some providers. - timeout = 3.0 - - async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None: - # Skip special implementations (inspect/providers) that don't have provider specs - if not hasattr(impl, "__provider_spec__"): - return None - api_name = impl.__provider_spec__.api.name - if not hasattr(impl, "health"): - return ( - api_name, - HealthResponse( - status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check" - ), - ) - - try: - health = await asyncio.wait_for(impl.health(), timeout=timeout) - return api_name, health - except TimeoutError: - return ( - api_name, - HealthResponse( - status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds" - ), - ) - except Exception as e: - return ( - api_name, - HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"), - ) - - # Create tasks for all providers - tasks = [check_provider_health(impl) for impl in self.deps.values()] - - # Wait for all health checks to complete - results = await asyncio.gather(*tasks) - - # Organize results by API and provider ID - for result in results: - if result is None: # Skip special implementations - continue - api_name, health_response = result - providers_health[api_name] = health_response - - return providers_health diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py deleted file mode 100644 index 20c17e59d..000000000 --- a/llama_stack/core/routers/__init__.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.core.datatypes import ( - AccessRule, - RoutedProtocol, -) -from llama_stack.core.stack import StackRunConfig -from llama_stack.core.store import DistributionRegistry -from llama_stack.providers.datatypes import Api, RoutingTable -from llama_stack.providers.utils.inference.inference_store import InferenceStore - - -async def get_routing_table_impl( - api: Api, - impls_by_provider_id: dict[str, RoutedProtocol], - _deps, - dist_registry: DistributionRegistry, - policy: list[AccessRule], -) -> Any: - from ..routing_tables.benchmarks import BenchmarksRoutingTable - from ..routing_tables.datasets import DatasetsRoutingTable - from ..routing_tables.models import ModelsRoutingTable - from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable - from ..routing_tables.shields import ShieldsRoutingTable - from ..routing_tables.toolgroups import ToolGroupsRoutingTable - from ..routing_tables.vector_stores import VectorStoresRoutingTable - - api_to_tables = { - "models": ModelsRoutingTable, - "shields": ShieldsRoutingTable, - "datasets": DatasetsRoutingTable, - "scoring_functions": ScoringFunctionsRoutingTable, - "benchmarks": BenchmarksRoutingTable, - "tool_groups": ToolGroupsRoutingTable, - "vector_stores": VectorStoresRoutingTable, - } - - if api.value not in api_to_tables: - raise ValueError(f"API {api.value} not found in router map") - - impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy) - await impl.initialize() - return impl - - -async def get_auto_router_impl( - api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule] -) -> Any: - from .datasets import DatasetIORouter - from .eval_scoring import EvalRouter, ScoringRouter - from .inference import InferenceRouter - from .safety import SafetyRouter - from .tool_runtime import ToolRuntimeRouter - from .vector_io import VectorIORouter - - api_to_routers = { - "vector_io": VectorIORouter, - "inference": InferenceRouter, - "safety": SafetyRouter, - "datasetio": DatasetIORouter, - "scoring": ScoringRouter, - "eval": EvalRouter, - "tool_runtime": ToolRuntimeRouter, - } - if api.value not in api_to_routers: - raise ValueError(f"API {api.value} not found in router map") - - api_to_dep_impl = {} - if run_config.telemetry.enabled: - api_to_deps = { - "inference": {"telemetry": Api.telemetry}, - } - for dep_name, dep_api in api_to_deps.get(api.value, {}).items(): - if dep_api in deps: - api_to_dep_impl[dep_name] = deps[dep_api] - - # TODO: move pass configs to routers instead - if api == Api.inference: - inference_ref = run_config.storage.stores.inference - if not inference_ref: - raise ValueError("storage.stores.inference must be configured in run config") - - inference_store = InferenceStore( - reference=inference_ref, - policy=policy, - ) - await inference_store.initialize() - api_to_dep_impl["store"] = inference_store - - elif api == Api.vector_io: - api_to_dep_impl["vector_stores_config"] = run_config.vector_stores - - impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) - await impl.initialize() - return impl diff --git a/llama_stack/core/routers/datasets.py b/llama_stack/core/routers/datasets.py deleted file mode 100644 index 2f1d5f78e..000000000 --- a/llama_stack/core/routers/datasets.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import DatasetPurpose, DataSource -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable - -logger = get_logger(name=__name__, category="core::routers") - - -class DatasetIORouter(DatasetIO): - def __init__( - self, - routing_table: RoutingTable, - ) -> None: - logger.debug("Initializing DatasetIORouter") - self.routing_table = routing_table - - async def initialize(self) -> None: - logger.debug("DatasetIORouter.initialize") - pass - - async def shutdown(self) -> None: - logger.debug("DatasetIORouter.shutdown") - pass - - async def register_dataset( - self, - purpose: DatasetPurpose, - source: DataSource, - metadata: dict[str, Any] | None = None, - dataset_id: str | None = None, - ) -> None: - logger.debug( - f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}", - ) - await self.routing_table.register_dataset( - purpose=purpose, - source=source, - metadata=metadata, - dataset_id=dataset_id, - ) - - async def iterrows( - self, - dataset_id: str, - start_index: int | None = None, - limit: int | None = None, - ) -> PaginatedResponse: - logger.debug( - f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}", - ) - provider = await self.routing_table.get_provider_impl(dataset_id) - return await provider.iterrows( - dataset_id=dataset_id, - start_index=start_index, - limit=limit, - ) - - async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: - logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows") - provider = await self.routing_table.get_provider_impl(dataset_id) - return await provider.append_rows( - dataset_id=dataset_id, - rows=rows, - ) diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py deleted file mode 100644 index b20ad44ca..000000000 --- a/llama_stack/core/routers/inference.py +++ /dev/null @@ -1,586 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import time -from collections.abc import AsyncGenerator, AsyncIterator -from datetime import UTC, datetime -from typing import Annotated, Any - -from fastapi import Body -from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam -from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam -from pydantic import TypeAdapter - -from llama_stack.apis.common.content_types import ( - InterleavedContent, -) -from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError -from llama_stack.apis.inference import ( - ChatCompletionResponse, - ChatCompletionResponseEventType, - ChatCompletionResponseStreamChunk, - CompletionMessage, - CompletionResponse, - CompletionResponseStreamChunk, - Inference, - ListOpenAIChatCompletionResponse, - Message, - OpenAIAssistantMessageParam, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIChatCompletionToolCall, - OpenAIChatCompletionToolCallFunction, - OpenAIChoice, - OpenAIChoiceLogprobs, - OpenAICompletion, - OpenAICompletionRequestWithExtraBody, - OpenAICompletionWithInputMessages, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - Order, - StopReason, - ToolPromptFormat, -) -from llama_stack.apis.models import Model, ModelType -from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry -from llama_stack.log import get_logger -from llama_stack.models.llama.llama3.chat_format import ChatFormat -from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable -from llama_stack.providers.utils.inference.inference_store import InferenceStore -from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span - -logger = get_logger(name=__name__, category="core::routers") - - -class InferenceRouter(Inference): - """Routes to an provider based on the model""" - - def __init__( - self, - routing_table: RoutingTable, - telemetry: Telemetry | None = None, - store: InferenceStore | None = None, - ) -> None: - logger.debug("Initializing InferenceRouter") - self.routing_table = routing_table - self.telemetry = telemetry - self.store = store - if self.telemetry: - self.tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(self.tokenizer) - - async def initialize(self) -> None: - logger.debug("InferenceRouter.initialize") - - async def shutdown(self) -> None: - logger.debug("InferenceRouter.shutdown") - if self.store: - try: - await self.store.shutdown() - except Exception as e: - logger.warning(f"Error during InferenceStore shutdown: {e}") - - async def register_model( - self, - model_id: str, - provider_model_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - model_type: ModelType | None = None, - ) -> None: - logger.debug( - f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}", - ) - await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type) - - def _construct_metrics( - self, - prompt_tokens: int, - completion_tokens: int, - total_tokens: int, - model: Model, - ) -> list[MetricEvent]: - """Constructs a list of MetricEvent objects containing token usage metrics. - - Args: - prompt_tokens: Number of tokens in the prompt - completion_tokens: Number of tokens in the completion - total_tokens: Total number of tokens used - model: Model object containing model_id and provider_id - - Returns: - List of MetricEvent objects with token usage metrics - """ - span = get_current_span() - if span is None: - logger.warning("No span found for token usage metrics") - return [] - - metrics = [ - ("prompt_tokens", prompt_tokens), - ("completion_tokens", completion_tokens), - ("total_tokens", total_tokens), - ] - metric_events = [] - for metric_name, value in metrics: - metric_events.append( - MetricEvent( - trace_id=span.trace_id, - span_id=span.span_id, - metric=metric_name, - value=value, - timestamp=datetime.now(UTC), - unit="tokens", - attributes={ - "model_id": model.model_id, - "provider_id": model.provider_id, - }, - ) - ) - return metric_events - - async def _compute_and_log_token_usage( - self, - prompt_tokens: int, - completion_tokens: int, - total_tokens: int, - model: Model, - ) -> list[MetricInResponse]: - metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model) - if self.telemetry: - for metric in metrics: - enqueue_event(metric) - return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics] - - async def _count_tokens( - self, - messages: list[Message] | InterleavedContent, - tool_prompt_format: ToolPromptFormat | None = None, - ) -> int | None: - if not hasattr(self, "formatter") or self.formatter is None: - return None - - if isinstance(messages, list): - encoded = self.formatter.encode_dialog_prompt(messages, tool_prompt_format) - else: - encoded = self.formatter.encode_content(messages) - return len(encoded.tokens) if encoded and encoded.tokens else 0 - - async def _get_model(self, model_id: str, expected_model_type: str) -> Model: - """takes a model id and gets model after ensuring that it is accessible and of the correct type""" - model = await self.routing_table.get_model(model_id) - if model is None: - raise ModelNotFoundError(model_id) - if model.model_type != expected_model_type: - raise ModelTypeError(model_id, model.model_type, expected_model_type) - return model - - async def openai_completion( - self, - params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)], - ) -> OpenAICompletion: - logger.debug( - f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}", - ) - model_obj = await self._get_model(params.model, ModelType.llm) - - # Update params with the resolved model identifier - params.model = model_obj.identifier - - provider = await self.routing_table.get_provider_impl(model_obj.identifier) - if params.stream: - return await provider.openai_completion(params) - # TODO: Metrics do NOT work with openai_completion stream=True due to the fact - # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently. - - response = await provider.openai_completion(params) - if self.telemetry: - metrics = self._construct_metrics( - prompt_tokens=response.usage.prompt_tokens, - completion_tokens=response.usage.completion_tokens, - total_tokens=response.usage.total_tokens, - model=model_obj, - ) - for metric in metrics: - enqueue_event(metric) - - # these metrics will show up in the client response. - response.metrics = ( - metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics - ) - return response - - async def openai_chat_completion( - self, - params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)], - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - logger.debug( - f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}", - ) - model_obj = await self._get_model(params.model, ModelType.llm) - - # Use the OpenAI client for a bit of extra input validation without - # exposing the OpenAI client itself as part of our API surface - if params.tool_choice: - TypeAdapter(OpenAIChatCompletionToolChoiceOptionParam).validate_python(params.tool_choice) - if params.tools is None: - raise ValueError("'tool_choice' is only allowed when 'tools' is also provided") - if params.tools: - for tool in params.tools: - TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool) - - # Some providers make tool calls even when tool_choice is "none" - # so just clear them both out to avoid unexpected tool calls - if params.tool_choice == "none" and params.tools is not None: - params.tool_choice = None - params.tools = None - - # Update params with the resolved model identifier - params.model = model_obj.identifier - - provider = await self.routing_table.get_provider_impl(model_obj.identifier) - if params.stream: - response_stream = await provider.openai_chat_completion(params) - - # For streaming, the provider returns AsyncIterator[OpenAIChatCompletionChunk] - # We need to add metrics to each chunk and store the final completion - return self.stream_tokens_and_compute_metrics_openai_chat( - response=response_stream, - model=model_obj, - messages=params.messages, - ) - - response = await self._nonstream_openai_chat_completion(provider, params) - - # Store the response with the ID that will be returned to the client - if self.store: - asyncio.create_task(self.store.store_chat_completion(response, params.messages)) - - if self.telemetry: - metrics = self._construct_metrics( - prompt_tokens=response.usage.prompt_tokens, - completion_tokens=response.usage.completion_tokens, - total_tokens=response.usage.total_tokens, - model=model_obj, - ) - for metric in metrics: - enqueue_event(metric) - # these metrics will show up in the client response. - response.metrics = ( - metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics - ) - return response - - async def openai_embeddings( - self, - params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], - ) -> OpenAIEmbeddingsResponse: - logger.debug( - f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", - ) - model_obj = await self._get_model(params.model, ModelType.embedding) - - # Update model to use resolved identifier - params.model = model_obj.identifier - - provider = await self.routing_table.get_provider_impl(model_obj.identifier) - return await provider.openai_embeddings(params) - - async def list_chat_completions( - self, - after: str | None = None, - limit: int | None = 20, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIChatCompletionResponse: - if self.store: - return await self.store.list_chat_completions(after, limit, model, order) - raise NotImplementedError("List chat completions is not supported: inference store is not configured.") - - async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: - if self.store: - return await self.store.get_chat_completion(completion_id) - raise NotImplementedError("Get chat completion is not supported: inference store is not configured.") - - async def _nonstream_openai_chat_completion( - self, provider: Inference, params: OpenAIChatCompletionRequestWithExtraBody - ) -> OpenAIChatCompletion: - response = await provider.openai_chat_completion(params) - for choice in response.choices: - # some providers return an empty list for no tool calls in non-streaming responses - # but the OpenAI API returns None. So, set tool_calls to None if it's empty - if choice.message and choice.message.tool_calls is not None and len(choice.message.tool_calls) == 0: - choice.message.tool_calls = None - return response - - async def health(self) -> dict[str, HealthResponse]: - health_statuses = {} - timeout = 1 # increasing the timeout to 1 second for health checks - for provider_id, impl in self.routing_table.impls_by_provider_id.items(): - try: - # check if the provider has a health method - if not hasattr(impl, "health"): - continue - health = await asyncio.wait_for(impl.health(), timeout=timeout) - health_statuses[provider_id] = health - except TimeoutError: - health_statuses[provider_id] = HealthResponse( - status=HealthStatus.ERROR, - message=f"Health check timed out after {timeout} seconds", - ) - except NotImplementedError: - health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED) - except Exception as e: - health_statuses[provider_id] = HealthResponse( - status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}" - ) - return health_statuses - - async def stream_tokens_and_compute_metrics( - self, - response, - prompt_tokens, - model, - tool_prompt_format: ToolPromptFormat | None = None, - ) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None] | AsyncGenerator[CompletionResponseStreamChunk, None]: - completion_text = "" - async for chunk in response: - complete = False - if hasattr(chunk, "event"): # only ChatCompletions have .event - if chunk.event.event_type == ChatCompletionResponseEventType.progress: - if chunk.event.delta.type == "text": - completion_text += chunk.event.delta.text - if chunk.event.event_type == ChatCompletionResponseEventType.complete: - complete = True - completion_tokens = await self._count_tokens( - [ - CompletionMessage( - content=completion_text, - stop_reason=StopReason.end_of_turn, - ) - ], - tool_prompt_format=tool_prompt_format, - ) - else: - if hasattr(chunk, "delta"): - completion_text += chunk.delta - if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry: - complete = True - completion_tokens = await self._count_tokens(completion_text) - # if we are done receiving tokens - if complete: - total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) - - # Create a separate span for streaming completion metrics - if self.telemetry: - # Log metrics in the new span context - completion_metrics = self._construct_metrics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - model=model, - ) - for metric in completion_metrics: - if metric.metric in [ - "completion_tokens", - "total_tokens", - ]: # Only log completion and total tokens - enqueue_event(metric) - - # Return metrics in response - async_metrics = [ - MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics - ] - chunk.metrics = async_metrics if chunk.metrics is None else chunk.metrics + async_metrics - else: - # Fallback if no telemetry - completion_metrics = self._construct_metrics( - prompt_tokens or 0, - completion_tokens or 0, - total_tokens, - model, - ) - async_metrics = [ - MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics - ] - chunk.metrics = async_metrics if chunk.metrics is None else chunk.metrics + async_metrics - yield chunk - - async def count_tokens_and_compute_metrics( - self, - response: ChatCompletionResponse | CompletionResponse, - prompt_tokens, - model, - tool_prompt_format: ToolPromptFormat | None = None, - ): - if isinstance(response, ChatCompletionResponse): - content = [response.completion_message] - else: - content = response.content - completion_tokens = await self._count_tokens(messages=content, tool_prompt_format=tool_prompt_format) - total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) - - # Create a separate span for completion metrics - if self.telemetry: - # Log metrics in the new span context - completion_metrics = self._construct_metrics( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=total_tokens, - model=model, - ) - for metric in completion_metrics: - if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens - enqueue_event(metric) - - # Return metrics in response - return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics] - - # Fallback if no telemetry - metrics = self._construct_metrics( - prompt_tokens or 0, - completion_tokens or 0, - total_tokens, - model, - ) - return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics] - - async def stream_tokens_and_compute_metrics_openai_chat( - self, - response: AsyncIterator[OpenAIChatCompletionChunk], - model: Model, - messages: list[OpenAIMessageParam] | None = None, - ) -> AsyncIterator[OpenAIChatCompletionChunk]: - """Stream OpenAI chat completion chunks, compute metrics, and store the final completion.""" - id = None - created = None - choices_data: dict[int, dict[str, Any]] = {} - - try: - async for chunk in response: - # Skip None chunks - if chunk is None: - continue - - # Capture ID and created timestamp from first chunk - if id is None and chunk.id: - id = chunk.id - if created is None and chunk.created: - created = chunk.created - - # Accumulate choice data for final assembly - if chunk.choices: - for choice_delta in chunk.choices: - idx = choice_delta.index - if idx not in choices_data: - choices_data[idx] = { - "content_parts": [], - "tool_calls_builder": {}, - "finish_reason": "stop", - "logprobs_content_parts": [], - } - current_choice_data = choices_data[idx] - - if choice_delta.delta: - delta = choice_delta.delta - if delta.content: - current_choice_data["content_parts"].append(delta.content) - if delta.tool_calls: - for tool_call_delta in delta.tool_calls: - tc_idx = tool_call_delta.index - if tc_idx not in current_choice_data["tool_calls_builder"]: - current_choice_data["tool_calls_builder"][tc_idx] = { - "id": None, - "type": "function", - "function_name_parts": [], - "function_arguments_parts": [], - } - builder = current_choice_data["tool_calls_builder"][tc_idx] - if tool_call_delta.id: - builder["id"] = tool_call_delta.id - if tool_call_delta.type: - builder["type"] = tool_call_delta.type - if tool_call_delta.function: - if tool_call_delta.function.name: - builder["function_name_parts"].append(tool_call_delta.function.name) - if tool_call_delta.function.arguments: - builder["function_arguments_parts"].append( - tool_call_delta.function.arguments - ) - if choice_delta.finish_reason: - current_choice_data["finish_reason"] = choice_delta.finish_reason - if choice_delta.logprobs and choice_delta.logprobs.content: - current_choice_data["logprobs_content_parts"].extend(choice_delta.logprobs.content) - - # Compute metrics on final chunk - if chunk.choices and chunk.choices[0].finish_reason: - completion_text = "" - for choice_data in choices_data.values(): - completion_text += "".join(choice_data["content_parts"]) - - # Add metrics to the chunk - if self.telemetry and hasattr(chunk, "usage") and chunk.usage: - metrics = self._construct_metrics( - prompt_tokens=chunk.usage.prompt_tokens, - completion_tokens=chunk.usage.completion_tokens, - total_tokens=chunk.usage.total_tokens, - model=model, - ) - for metric in metrics: - enqueue_event(metric) - - yield chunk - finally: - # Store the final assembled completion - if id and self.store and messages: - assembled_choices: list[OpenAIChoice] = [] - for choice_idx, choice_data in choices_data.items(): - content_str = "".join(choice_data["content_parts"]) - assembled_tool_calls: list[OpenAIChatCompletionToolCall] = [] - if choice_data["tool_calls_builder"]: - for tc_build_data in choice_data["tool_calls_builder"].values(): - if tc_build_data["id"]: - func_name = "".join(tc_build_data["function_name_parts"]) - func_args = "".join(tc_build_data["function_arguments_parts"]) - assembled_tool_calls.append( - OpenAIChatCompletionToolCall( - id=tc_build_data["id"], - type=tc_build_data["type"], - function=OpenAIChatCompletionToolCallFunction( - name=func_name, arguments=func_args - ), - ) - ) - message = OpenAIAssistantMessageParam( - role="assistant", - content=content_str if content_str else None, - tool_calls=assembled_tool_calls if assembled_tool_calls else None, - ) - logprobs_content = choice_data["logprobs_content_parts"] - final_logprobs = OpenAIChoiceLogprobs(content=logprobs_content) if logprobs_content else None - - assembled_choices.append( - OpenAIChoice( - finish_reason=choice_data["finish_reason"], - index=choice_idx, - message=message, - logprobs=final_logprobs, - ) - ) - - final_response = OpenAIChatCompletion( - id=id, - choices=assembled_choices, - created=created or int(time.time()), - model=model.identifier, - object="chat.completion", - ) - logger.debug(f"InferenceRouter.completion_response: {final_response}") - asyncio.create_task(self.store.store_chat_completion(final_response, messages)) diff --git a/llama_stack/core/routers/safety.py b/llama_stack/core/routers/safety.py deleted file mode 100644 index 9ba3327f1..000000000 --- a/llama_stack/core/routers/safety.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.inference import Message -from llama_stack.apis.safety import RunShieldResponse, Safety -from llama_stack.apis.safety.safety import ModerationObject -from llama_stack.apis.shields import Shield -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable - -logger = get_logger(name=__name__, category="core::routers") - - -class SafetyRouter(Safety): - def __init__( - self, - routing_table: RoutingTable, - ) -> None: - logger.debug("Initializing SafetyRouter") - self.routing_table = routing_table - - async def initialize(self) -> None: - logger.debug("SafetyRouter.initialize") - pass - - async def shutdown(self) -> None: - logger.debug("SafetyRouter.shutdown") - pass - - async def register_shield( - self, - shield_id: str, - provider_shield_id: str | None = None, - provider_id: str | None = None, - params: dict[str, Any] | None = None, - ) -> Shield: - logger.debug(f"SafetyRouter.register_shield: {shield_id}") - return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params) - - async def unregister_shield(self, identifier: str) -> None: - logger.debug(f"SafetyRouter.unregister_shield: {identifier}") - return await self.routing_table.unregister_shield(identifier) - - async def run_shield( - self, - shield_id: str, - messages: list[Message], - params: dict[str, Any] = None, - ) -> RunShieldResponse: - logger.debug(f"SafetyRouter.run_shield: {shield_id}") - provider = await self.routing_table.get_provider_impl(shield_id) - return await provider.run_shield( - shield_id=shield_id, - messages=messages, - params=params, - ) - - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: - async def get_shield_id(self, model: str) -> str: - """Get Shield id from model (provider_resource_id) of shield.""" - list_shields_response = await self.routing_table.list_shields() - - matches = [s.identifier for s in list_shields_response.data if model == s.provider_resource_id] - - if not matches: - raise ValueError(f"No shield associated with provider_resource id {model}") - if len(matches) > 1: - raise ValueError(f"Multiple shields associated with provider_resource id {model}") - return matches[0] - - shield_id = await get_shield_id(self, model) - logger.debug(f"SafetyRouter.run_moderation: {shield_id}") - provider = await self.routing_table.get_provider_impl(shield_id) - - response = await provider.run_moderation( - input=input, - model=model, - ) - - return response diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py deleted file mode 100644 index be4c13905..000000000 --- a/llama_stack/core/routers/tool_runtime.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.common.content_types import ( - URL, - InterleavedContent, -) -from llama_stack.apis.tools import ( - ListToolDefsResponse, - RAGDocument, - RAGQueryConfig, - RAGQueryResult, - RAGToolRuntime, - ToolRuntime, -) -from llama_stack.log import get_logger - -from ..routing_tables.toolgroups import ToolGroupsRoutingTable - -logger = get_logger(name=__name__, category="core::routers") - - -class ToolRuntimeRouter(ToolRuntime): - class RagToolImpl(RAGToolRuntime): - def __init__( - self, - routing_table: ToolGroupsRoutingTable, - ) -> None: - logger.debug("Initializing ToolRuntimeRouter.RagToolImpl") - self.routing_table = routing_table - - async def query( - self, - content: InterleavedContent, - vector_store_ids: list[str], - query_config: RAGQueryConfig | None = None, - ) -> RAGQueryResult: - logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}") - provider = await self.routing_table.get_provider_impl("knowledge_search") - return await provider.query(content, vector_store_ids, query_config) - - async def insert( - self, - documents: list[RAGDocument], - vector_store_id: str, - chunk_size_in_tokens: int = 512, - ) -> None: - logger.debug( - f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" - ) - provider = await self.routing_table.get_provider_impl("insert_into_memory") - return await provider.insert(documents, vector_store_id, chunk_size_in_tokens) - - def __init__( - self, - routing_table: ToolGroupsRoutingTable, - ) -> None: - logger.debug("Initializing ToolRuntimeRouter") - self.routing_table = routing_table - - # HACK ALERT this should be in sync with "get_all_api_endpoints()" - self.rag_tool = self.RagToolImpl(routing_table) - for method in ("query", "insert"): - setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method)) - - async def initialize(self) -> None: - logger.debug("ToolRuntimeRouter.initialize") - pass - - async def shutdown(self) -> None: - logger.debug("ToolRuntimeRouter.shutdown") - pass - - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any: - logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}") - provider = await self.routing_table.get_provider_impl(tool_name) - return await provider.invoke_tool( - tool_name=tool_name, - kwargs=kwargs, - ) - - async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None - ) -> ListToolDefsResponse: - logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}") - return await self.routing_table.list_tools(tool_group_id) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py deleted file mode 100644 index 2b1701dc2..000000000 --- a/llama_stack/core/routers/vector_io.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import uuid -from typing import Annotated, Any - -from fastapi import Body - -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.models import ModelType -from llama_stack.apis.vector_io import ( - Chunk, - OpenAICreateVectorStoreFileBatchRequestWithExtraBody, - OpenAICreateVectorStoreRequestWithExtraBody, - QueryChunksResponse, - SearchRankingOptions, - VectorIO, - VectorStoreChunkingStrategy, - VectorStoreDeleteResponse, - VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, - VectorStoreFileDeleteResponse, - VectorStoreFileObject, - VectorStoreFilesListInBatchResponse, - VectorStoreFileStatus, - VectorStoreListResponse, - VectorStoreObject, - VectorStoreSearchResponsePage, -) -from llama_stack.core.datatypes import VectorStoresConfig -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable - -logger = get_logger(name=__name__, category="core::routers") - - -class VectorIORouter(VectorIO): - """Routes to an provider based on the vector db identifier""" - - def __init__( - self, - routing_table: RoutingTable, - vector_stores_config: VectorStoresConfig | None = None, - ) -> None: - logger.debug("Initializing VectorIORouter") - self.routing_table = routing_table - self.vector_stores_config = vector_stores_config - - async def initialize(self) -> None: - logger.debug("VectorIORouter.initialize") - pass - - async def shutdown(self) -> None: - logger.debug("VectorIORouter.shutdown") - pass - - async def _get_embedding_model_dimension(self, embedding_model_id: str) -> int: - """Get the embedding dimension for a specific embedding model.""" - all_models = await self.routing_table.get_all_with_type("model") - - for model in all_models: - if model.identifier == embedding_model_id and model.model_type == ModelType.embedding: - dimension = model.metadata.get("embedding_dimension") - if dimension is None: - raise ValueError(f"Embedding model '{embedding_model_id}' has no embedding_dimension in metadata") - return int(dimension) - - raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") - - async def insert_chunks( - self, - vector_db_id: str, - chunks: list[Chunk], - ttl_seconds: int | None = None, - ) -> None: - doc_ids = [chunk.document_id for chunk in chunks[:3]] - logger.debug( - f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, " - f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}" - ) - provider = await self.routing_table.get_provider_impl(vector_db_id) - return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds) - - async def query_chunks( - self, - vector_db_id: str, - query: InterleavedContent, - params: dict[str, Any] | None = None, - ) -> QueryChunksResponse: - logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}") - provider = await self.routing_table.get_provider_impl(vector_db_id) - return await provider.query_chunks(vector_db_id, query, params) - - # OpenAI Vector Stores API endpoints - async def openai_create_vector_store( - self, - params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], - ) -> VectorStoreObject: - # Extract llama-stack-specific parameters from extra_body - extra = params.model_extra or {} - embedding_model = extra.get("embedding_model") - embedding_dimension = extra.get("embedding_dimension") - provider_id = extra.get("provider_id") - - # Use default embedding model if not specified - if ( - embedding_model is None - and self.vector_stores_config - and self.vector_stores_config.default_embedding_model is not None - ): - # Construct the full model ID with provider prefix - embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id - model_id = self.vector_stores_config.default_embedding_model.model_id - embedding_model = f"{embedding_provider_id}/{model_id}" - - if embedding_model is not None and embedding_dimension is None: - embedding_dimension = await self._get_embedding_model_dimension(embedding_model) - - # Auto-select provider if not specified - if provider_id is None: - num_providers = len(self.routing_table.impls_by_provider_id) - if num_providers == 0: - raise ValueError("No vector_io providers available") - if num_providers > 1: - available_providers = list(self.routing_table.impls_by_provider_id.keys()) - # Use default configured provider - if self.vector_stores_config and self.vector_stores_config.default_provider_id: - default_provider = self.vector_stores_config.default_provider_id - if default_provider in available_providers: - provider_id = default_provider - logger.debug(f"Using configured default vector store provider: {provider_id}") - else: - raise ValueError( - f"Configured default vector store provider '{default_provider}' not found. " - f"Available providers: {available_providers}" - ) - else: - raise ValueError( - f"Multiple vector_io providers available. Please specify provider_id in extra_body. " - f"Available providers: {available_providers}" - ) - else: - provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] - - vector_store_id = f"vs_{uuid.uuid4()}" - registered_vector_store = await self.routing_table.register_vector_store( - vector_store_id=vector_store_id, - embedding_model=embedding_model, - embedding_dimension=embedding_dimension, - provider_id=provider_id, - provider_vector_store_id=vector_store_id, - vector_store_name=params.name, - ) - provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier) - - # Update model_extra with registered values so provider uses the already-registered vector_store - if params.model_extra is None: - params.model_extra = {} - params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id - params.model_extra["provider_id"] = registered_vector_store.provider_id - if embedding_model is not None: - params.model_extra["embedding_model"] = embedding_model - if embedding_dimension is not None: - params.model_extra["embedding_dimension"] = embedding_dimension - - return await provider.openai_create_vector_store(params) - - async def openai_list_vector_stores( - self, - limit: int | None = 20, - order: str | None = "desc", - after: str | None = None, - before: str | None = None, - ) -> VectorStoreListResponse: - logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") - # Route to default provider for now - could aggregate from all providers in the future - # call retrieve on each vector dbs to get list of vector stores - vector_stores = await self.routing_table.get_all_with_type("vector_store") - all_stores = [] - for vector_store in vector_stores: - try: - provider = await self.routing_table.get_provider_impl(vector_store.identifier) - vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier) - all_stores.append(vector_store) - except Exception as e: - logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}") - continue - - # Sort by created_at - reverse_order = order == "desc" - all_stores.sort(key=lambda x: x.created_at, reverse=reverse_order) - - # Apply cursor-based pagination - if after: - after_index = next((i for i, store in enumerate(all_stores) if store.id == after), -1) - if after_index >= 0: - all_stores = all_stores[after_index + 1 :] - - if before: - before_index = next( - (i for i, store in enumerate(all_stores) if store.id == before), - len(all_stores), - ) - all_stores = all_stores[:before_index] - - # Apply limit - limited_stores = all_stores[:limit] - - # Determine pagination info - has_more = len(all_stores) > limit - first_id = limited_stores[0].id if limited_stores else None - last_id = limited_stores[-1].id if limited_stores else None - - return VectorStoreListResponse( - data=limited_stores, - has_more=has_more, - first_id=first_id, - last_id=last_id, - ) - - async def openai_retrieve_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreObject: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store(vector_store_id) - - async def openai_update_vector_store( - self, - vector_store_id: str, - name: str | None = None, - expires_after: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - ) -> VectorStoreObject: - logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_update_vector_store( - vector_store_id=vector_store_id, - name=name, - expires_after=expires_after, - metadata=metadata, - ) - - async def openai_delete_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreDeleteResponse: - logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") - return await self.routing_table.openai_delete_vector_store(vector_store_id) - - async def openai_search_vector_store( - self, - vector_store_id: str, - query: str | list[str], - filters: dict[str, Any] | None = None, - max_num_results: int | None = 10, - ranking_options: SearchRankingOptions | None = None, - rewrite_query: bool | None = False, - search_mode: str | None = "vector", - ) -> VectorStoreSearchResponsePage: - logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_search_vector_store( - vector_store_id=vector_store_id, - query=query, - filters=filters, - max_num_results=max_num_results, - ranking_options=ranking_options, - rewrite_query=rewrite_query, - search_mode=search_mode, - ) - - async def openai_attach_file_to_vector_store( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, - ) -> VectorStoreFileObject: - logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_attach_file_to_vector_store( - vector_store_id=vector_store_id, - file_id=file_id, - attributes=attributes, - chunking_strategy=chunking_strategy, - ) - - async def openai_list_files_in_vector_store( - self, - vector_store_id: str, - limit: int | None = 20, - order: str | None = "desc", - after: str | None = None, - before: str | None = None, - filter: VectorStoreFileStatus | None = None, - ) -> list[VectorStoreFileObject]: - logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_list_files_in_vector_store( - vector_store_id=vector_store_id, - limit=limit, - order=order, - after=after, - before=before, - filter=filter, - ) - - async def openai_retrieve_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileObject: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def openai_retrieve_vector_store_file_contents( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileContentsResponse: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_contents( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def openai_update_vector_store_file( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any], - ) -> VectorStoreFileObject: - logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_update_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - attributes=attributes, - ) - - async def openai_delete_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileDeleteResponse: - logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_delete_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def health(self) -> dict[str, HealthResponse]: - health_statuses = {} - timeout = 1 # increasing the timeout to 1 second for health checks - for provider_id, impl in self.routing_table.impls_by_provider_id.items(): - try: - # check if the provider has a health method - if not hasattr(impl, "health"): - continue - health = await asyncio.wait_for(impl.health(), timeout=timeout) - health_statuses[provider_id] = health - except TimeoutError: - health_statuses[provider_id] = HealthResponse( - status=HealthStatus.ERROR, - message=f"Health check timed out after {timeout} seconds", - ) - except NotImplementedError: - health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED) - except Exception as e: - health_statuses[provider_id] = HealthResponse( - status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}" - ) - return health_statuses - - async def openai_create_vector_store_file_batch( - self, - vector_store_id: str, - params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], - ) -> VectorStoreFileBatchObject: - logger.debug( - f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" - ) - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_create_vector_store_file_batch(vector_store_id, params) - - async def openai_retrieve_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ) -> VectorStoreFileBatchObject: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - ) - - async def openai_list_files_in_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - after: str | None = None, - before: str | None = None, - filter: str | None = None, - limit: int | None = 20, - order: str | None = "desc", - ) -> VectorStoreFilesListInBatchResponse: - logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_list_files_in_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - after=after, - before=before, - filter=filter, - limit=limit, - order=order, - ) - - async def openai_cancel_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ) -> VectorStoreFileBatchObject: - logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_cancel_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - ) diff --git a/llama_stack/core/routing_tables/benchmarks.py b/llama_stack/core/routing_tables/benchmarks.py deleted file mode 100644 index 8c87d395d..000000000 --- a/llama_stack/core/routing_tables/benchmarks.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse -from llama_stack.core.datatypes import ( - BenchmarkWithOwner, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): - async def list_benchmarks(self) -> ListBenchmarksResponse: - return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark")) - - async def get_benchmark(self, benchmark_id: str) -> Benchmark: - benchmark = await self.get_object_by_identifier("benchmark", benchmark_id) - if benchmark is None: - raise ValueError(f"Benchmark '{benchmark_id}' not found") - return benchmark - - async def register_benchmark( - self, - benchmark_id: str, - dataset_id: str, - scoring_functions: list[str], - metadata: dict[str, Any] | None = None, - provider_benchmark_id: str | None = None, - provider_id: str | None = None, - ) -> None: - if metadata is None: - metadata = {} - if provider_id is None: - if len(self.impls_by_provider_id) == 1: - provider_id = list(self.impls_by_provider_id.keys())[0] - else: - raise ValueError( - "No provider specified and multiple providers available. Please specify a provider_id." - ) - if provider_benchmark_id is None: - provider_benchmark_id = benchmark_id - benchmark = BenchmarkWithOwner( - identifier=benchmark_id, - dataset_id=dataset_id, - scoring_functions=scoring_functions, - metadata=metadata, - provider_id=provider_id, - provider_resource_id=provider_benchmark_id, - ) - await self.register_object(benchmark) - - async def unregister_benchmark(self, benchmark_id: str) -> None: - existing_benchmark = await self.get_benchmark(benchmark_id) - await self.unregister_object(existing_benchmark) diff --git a/llama_stack/core/routing_tables/datasets.py b/llama_stack/core/routing_tables/datasets.py deleted file mode 100644 index b129c9ec5..000000000 --- a/llama_stack/core/routing_tables/datasets.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import uuid -from typing import Any - -from llama_stack.apis.common.errors import DatasetNotFoundError -from llama_stack.apis.datasets import ( - Dataset, - DatasetPurpose, - Datasets, - DatasetType, - DataSource, - ListDatasetsResponse, - RowsDataSource, - URIDataSource, -) -from llama_stack.apis.resource import ResourceType -from llama_stack.core.datatypes import ( - DatasetWithOwner, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): - async def list_datasets(self) -> ListDatasetsResponse: - return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) - - async def get_dataset(self, dataset_id: str) -> Dataset: - dataset = await self.get_object_by_identifier("dataset", dataset_id) - if dataset is None: - raise DatasetNotFoundError(dataset_id) - return dataset - - async def register_dataset( - self, - purpose: DatasetPurpose, - source: DataSource, - metadata: dict[str, Any] | None = None, - dataset_id: str | None = None, - ) -> Dataset: - if isinstance(source, dict): - if source["type"] == "uri": - source = URIDataSource.parse_obj(source) - elif source["type"] == "rows": - source = RowsDataSource.parse_obj(source) - - if not dataset_id: - dataset_id = f"dataset-{str(uuid.uuid4())}" - - provider_dataset_id = dataset_id - - # infer provider from source - if metadata and metadata.get("provider_id"): - provider_id = metadata.get("provider_id") # pass through from nvidia datasetio - elif source.type == DatasetType.rows.value: - provider_id = "localfs" - elif source.type == DatasetType.uri.value: - # infer provider from uri - if source.uri.startswith("huggingface"): - provider_id = "huggingface" - else: - provider_id = "localfs" - else: - raise ValueError(f"Unknown data source type: {source.type}") - - if metadata is None: - metadata = {} - - dataset = DatasetWithOwner( - identifier=dataset_id, - provider_resource_id=provider_dataset_id, - provider_id=provider_id, - purpose=purpose, - source=source, - metadata=metadata, - ) - - await self.register_object(dataset) - return dataset - - async def unregister_dataset(self, dataset_id: str) -> None: - dataset = await self.get_dataset(dataset_id) - await self.unregister_object(dataset) diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py deleted file mode 100644 index 7e43d7273..000000000 --- a/llama_stack/core/routing_tables/models.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import time -from typing import Any - -from llama_stack.apis.common.errors import ModelNotFoundError -from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel -from llama_stack.core.datatypes import ( - ModelWithOwner, - RegistryEntrySource, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl, lookup_model - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class ModelsRoutingTable(CommonRoutingTableImpl, Models): - listed_providers: set[str] = set() - - async def refresh(self) -> None: - for provider_id, provider in self.impls_by_provider_id.items(): - refresh = await provider.should_refresh_models() - refresh = refresh or provider_id not in self.listed_providers - if not refresh: - continue - - try: - models = await provider.list_models() - except Exception as e: - logger.warning(f"Model refresh failed for provider {provider_id}: {e}") - continue - - self.listed_providers.add(provider_id) - if models is None: - continue - - await self.update_registered_models(provider_id, models) - - async def list_models(self) -> ListModelsResponse: - return ListModelsResponse(data=await self.get_all_with_type("model")) - - async def openai_list_models(self) -> OpenAIListModelsResponse: - models = await self.get_all_with_type("model") - openai_models = [ - OpenAIModel( - id=model.identifier, - object="model", - created=int(time.time()), - owned_by="llama_stack", - ) - for model in models - ] - return OpenAIListModelsResponse(data=openai_models) - - async def get_model(self, model_id: str) -> Model: - return await lookup_model(self, model_id) - - async def get_provider_impl(self, model_id: str) -> Any: - model = await lookup_model(self, model_id) - if model.provider_id not in self.impls_by_provider_id: - raise ValueError(f"Provider {model.provider_id} not found in the routing table") - return self.impls_by_provider_id[model.provider_id] - - async def has_model(self, model_id: str) -> bool: - """ - Check if a model exists in the routing table. - - :param model_id: The model identifier to check - :return: True if the model exists, False otherwise - """ - try: - await lookup_model(self, model_id) - return True - except ModelNotFoundError: - return False - - async def register_model( - self, - model_id: str, - provider_model_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - model_type: ModelType | None = None, - ) -> Model: - if provider_id is None: - # If provider_id not specified, use the only provider if it supports this model - if len(self.impls_by_provider_id) == 1: - provider_id = list(self.impls_by_provider_id.keys())[0] - else: - raise ValueError( - f"Please specify a provider_id for model {model_id} since multiple providers are available: {self.impls_by_provider_id.keys()}.\n\n" - "Use the provider_id as a prefix to disambiguate, e.g. 'provider_id/model_id'." - ) - - provider_model_id = provider_model_id or model_id - metadata = metadata or {} - model_type = model_type or ModelType.llm - if "embedding_dimension" not in metadata and model_type == ModelType.embedding: - raise ValueError("Embedding model must have an embedding dimension in its metadata") - - identifier = f"{provider_id}/{provider_model_id}" - model = ModelWithOwner( - identifier=identifier, - provider_resource_id=provider_model_id, - provider_id=provider_id, - metadata=metadata, - model_type=model_type, - source=RegistryEntrySource.via_register_api, - ) - registered_model = await self.register_object(model) - return registered_model - - async def unregister_model(self, model_id: str) -> None: - existing_model = await self.get_model(model_id) - if existing_model is None: - raise ModelNotFoundError(model_id) - await self.unregister_object(existing_model) - - async def update_registered_models( - self, - provider_id: str, - models: list[Model], - ) -> None: - existing_models = await self.get_all_with_type("model") - - # we may have an alias for the model registered by the user (or during initialization - # from run.yaml) that we need to keep track of - model_ids = {} - for model in existing_models: - if model.provider_id != provider_id: - continue - if model.source == RegistryEntrySource.via_register_api: - model_ids[model.provider_resource_id] = model.identifier - continue - - logger.debug(f"unregistering model {model.identifier}") - await self.unregister_object(model) - - for model in models: - if model.provider_resource_id in model_ids: - # avoid overwriting a non-provider-registered model entry - continue - - if model.identifier == model.provider_resource_id: - model.identifier = f"{provider_id}/{model.provider_resource_id}" - - logger.debug(f"registering model {model.identifier} ({model.provider_resource_id})") - await self.register_object( - ModelWithOwner( - identifier=model.identifier, - provider_resource_id=model.provider_resource_id, - provider_id=provider_id, - metadata=model.metadata, - model_type=model.model_type, - source=RegistryEntrySource.listed_from_provider, - ) - ) diff --git a/llama_stack/core/routing_tables/scoring_functions.py b/llama_stack/core/routing_tables/scoring_functions.py deleted file mode 100644 index 520f07014..000000000 --- a/llama_stack/core/routing_tables/scoring_functions.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.resource import ResourceType -from llama_stack.apis.scoring_functions import ( - ListScoringFunctionsResponse, - ScoringFn, - ScoringFnParams, - ScoringFunctions, -) -from llama_stack.core.datatypes import ( - ScoringFnWithOwner, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): - async def list_scoring_functions(self) -> ListScoringFunctionsResponse: - return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value)) - - async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: - scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id) - if scoring_fn is None: - raise ValueError(f"Scoring function '{scoring_fn_id}' not found") - return scoring_fn - - async def register_scoring_function( - self, - scoring_fn_id: str, - description: str, - return_type: ParamType, - provider_scoring_fn_id: str | None = None, - provider_id: str | None = None, - params: ScoringFnParams | None = None, - ) -> None: - if provider_scoring_fn_id is None: - provider_scoring_fn_id = scoring_fn_id - if provider_id is None: - if len(self.impls_by_provider_id) == 1: - provider_id = list(self.impls_by_provider_id.keys())[0] - else: - raise ValueError( - "No provider specified and multiple providers available. Please specify a provider_id." - ) - scoring_fn = ScoringFnWithOwner( - identifier=scoring_fn_id, - description=description, - return_type=return_type, - provider_resource_id=provider_scoring_fn_id, - provider_id=provider_id, - params=params, - ) - scoring_fn.provider_id = provider_id - await self.register_object(scoring_fn) - - async def unregister_scoring_function(self, scoring_fn_id: str) -> None: - existing_scoring_fn = await self.get_scoring_function(scoring_fn_id) - await self.unregister_object(existing_scoring_fn) diff --git a/llama_stack/core/routing_tables/shields.py b/llama_stack/core/routing_tables/shields.py deleted file mode 100644 index b1918d20a..000000000 --- a/llama_stack/core/routing_tables/shields.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.resource import ResourceType -from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields -from llama_stack.core.datatypes import ( - ShieldWithOwner, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): - async def list_shields(self) -> ListShieldsResponse: - return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value)) - - async def get_shield(self, identifier: str) -> Shield: - shield = await self.get_object_by_identifier("shield", identifier) - if shield is None: - raise ValueError(f"Shield '{identifier}' not found") - return shield - - async def register_shield( - self, - shield_id: str, - provider_shield_id: str | None = None, - provider_id: str | None = None, - params: dict[str, Any] | None = None, - ) -> Shield: - if provider_shield_id is None: - provider_shield_id = shield_id - if provider_id is None: - # If provider_id not specified, use the only provider if it supports this shield type - if len(self.impls_by_provider_id) == 1: - provider_id = list(self.impls_by_provider_id.keys())[0] - else: - raise ValueError( - "No provider specified and multiple providers available. Please specify a provider_id." - ) - if params is None: - params = {} - shield = ShieldWithOwner( - identifier=shield_id, - provider_resource_id=provider_shield_id, - provider_id=provider_id, - params=params, - ) - await self.register_object(shield) - return shield - - async def unregister_shield(self, identifier: str) -> None: - existing_shield = await self.get_shield(identifier) - await self.unregister_object(existing_shield) diff --git a/llama_stack/core/routing_tables/vector_stores.py b/llama_stack/core/routing_tables/vector_stores.py deleted file mode 100644 index c6c80a01e..000000000 --- a/llama_stack/core/routing_tables/vector_stores.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError -from llama_stack.apis.models import ModelType -from llama_stack.apis.resource import ResourceType - -# Removed VectorStores import to avoid exposing public API -from llama_stack.apis.vector_io.vector_io import ( - SearchRankingOptions, - VectorStoreChunkingStrategy, - VectorStoreDeleteResponse, - VectorStoreFileContentsResponse, - VectorStoreFileDeleteResponse, - VectorStoreFileObject, - VectorStoreFileStatus, - VectorStoreObject, - VectorStoreSearchResponsePage, -) -from llama_stack.core.datatypes import ( - VectorStoreWithOwner, -) -from llama_stack.log import get_logger - -from .common import CommonRoutingTableImpl, lookup_model - -logger = get_logger(name=__name__, category="core::routing_tables") - - -class VectorStoresRoutingTable(CommonRoutingTableImpl): - """Internal routing table for vector_store operations. - - Does not inherit from VectorStores to avoid exposing public API endpoints. - Only provides internal routing functionality for VectorIORouter. - """ - - # Internal methods only - no public API exposure - - async def register_vector_store( - self, - vector_store_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - provider_vector_store_id: str | None = None, - vector_store_name: str | None = None, - ) -> Any: - if provider_id is None: - if len(self.impls_by_provider_id) > 0: - provider_id = list(self.impls_by_provider_id.keys())[0] - if len(self.impls_by_provider_id) > 1: - logger.warning( - f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}." - ) - else: - raise ValueError("No provider available. Please configure a vector_io provider.") - model = await lookup_model(self, embedding_model) - if model is None: - raise ModelNotFoundError(embedding_model) - if model.model_type != ModelType.embedding: - raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) - - vector_store = VectorStoreWithOwner( - identifier=vector_store_id, - type=ResourceType.vector_store.value, - provider_id=provider_id, - provider_resource_id=provider_vector_store_id, - embedding_model=embedding_model, - embedding_dimension=embedding_dimension, - vector_store_name=vector_store_name, - ) - await self.register_object(vector_store) - return vector_store - - async def openai_retrieve_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreObject: - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store(vector_store_id) - - async def openai_update_vector_store( - self, - vector_store_id: str, - name: str | None = None, - expires_after: dict[str, Any] | None = None, - metadata: dict[str, Any] | None = None, - ) -> VectorStoreObject: - await self.assert_action_allowed("update", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_update_vector_store( - vector_store_id=vector_store_id, - name=name, - expires_after=expires_after, - metadata=metadata, - ) - - async def openai_delete_vector_store( - self, - vector_store_id: str, - ) -> VectorStoreDeleteResponse: - await self.assert_action_allowed("delete", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - result = await provider.openai_delete_vector_store(vector_store_id) - await self.unregister_vector_store(vector_store_id) - return result - - async def unregister_vector_store(self, vector_store_id: str) -> None: - """Remove the vector store from the routing table registry.""" - try: - vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id) - if vector_store_obj: - await self.unregister_object(vector_store_obj) - except Exception as e: - # Log the error but don't fail the operation - logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") - - async def openai_search_vector_store( - self, - vector_store_id: str, - query: str | list[str], - filters: dict[str, Any] | None = None, - max_num_results: int | None = 10, - ranking_options: SearchRankingOptions | None = None, - rewrite_query: bool | None = False, - search_mode: str | None = "vector", - ) -> VectorStoreSearchResponsePage: - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_search_vector_store( - vector_store_id=vector_store_id, - query=query, - filters=filters, - max_num_results=max_num_results, - ranking_options=ranking_options, - rewrite_query=rewrite_query, - search_mode=search_mode, - ) - - async def openai_attach_file_to_vector_store( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any] | None = None, - chunking_strategy: VectorStoreChunkingStrategy | None = None, - ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_attach_file_to_vector_store( - vector_store_id=vector_store_id, - file_id=file_id, - attributes=attributes, - chunking_strategy=chunking_strategy, - ) - - async def openai_list_files_in_vector_store( - self, - vector_store_id: str, - limit: int | None = 20, - order: str | None = "desc", - after: str | None = None, - before: str | None = None, - filter: VectorStoreFileStatus | None = None, - ) -> list[VectorStoreFileObject]: - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_list_files_in_vector_store( - vector_store_id=vector_store_id, - limit=limit, - order=order, - after=after, - before=before, - filter=filter, - ) - - async def openai_retrieve_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileObject: - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def openai_retrieve_vector_store_file_contents( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileContentsResponse: - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_contents( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def openai_update_vector_store_file( - self, - vector_store_id: str, - file_id: str, - attributes: dict[str, Any], - ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_update_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - attributes=attributes, - ) - - async def openai_delete_vector_store_file( - self, - vector_store_id: str, - file_id: str, - ) -> VectorStoreFileDeleteResponse: - await self.assert_action_allowed("delete", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_delete_vector_store_file( - vector_store_id=vector_store_id, - file_id=file_id, - ) - - async def openai_create_vector_store_file_batch( - self, - vector_store_id: str, - file_ids: list[str], - attributes: dict[str, Any] | None = None, - chunking_strategy: Any | None = None, - ): - await self.assert_action_allowed("update", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_create_vector_store_file_batch( - vector_store_id=vector_store_id, - file_ids=file_ids, - attributes=attributes, - chunking_strategy=chunking_strategy, - ) - - async def openai_retrieve_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ): - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - ) - - async def openai_list_files_in_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - after: str | None = None, - before: str | None = None, - filter: str | None = None, - limit: int | None = 20, - order: str | None = "desc", - ): - await self.assert_action_allowed("read", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_list_files_in_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - after=after, - before=before, - filter=filter, - limit=limit, - order=order, - ) - - async def openai_cancel_vector_store_file_batch( - self, - batch_id: str, - vector_store_id: str, - ): - await self.assert_action_allowed("update", "vector_store", vector_store_id) - provider = await self.get_provider_impl(vector_store_id) - return await provider.openai_cancel_vector_store_file_batch( - batch_id=batch_id, - vector_store_id=vector_store_id, - ) diff --git a/llama_stack/core/server/tracing.py b/llama_stack/core/server/tracing.py deleted file mode 100644 index 4c6df5b42..000000000 --- a/llama_stack/core/server/tracing.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from aiohttp import hdrs - -from llama_stack.core.external import ExternalApiSpec -from llama_stack.core.server.routes import find_matching_route, initialize_route_impls -from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace - -logger = get_logger(name=__name__, category="core::server") - - -class TracingMiddleware: - def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]): - self.app = app - self.impls = impls - self.external_apis = external_apis - # FastAPI built-in paths that should bypass custom routing - self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static") - - async def __call__(self, scope, receive, send): - if scope.get("type") == "lifespan": - return await self.app(scope, receive, send) - - path = scope.get("path", "") - - # Check if the path is a FastAPI built-in path - if path.startswith(self.fastapi_paths): - # Pass through to FastAPI's built-in handlers - logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}") - return await self.app(scope, receive, send) - - if not hasattr(self, "route_impls"): - self.route_impls = initialize_route_impls(self.impls, self.external_apis) - - try: - _, _, route_path, webmethod = find_matching_route( - scope.get("method", hdrs.METH_GET), path, self.route_impls - ) - except ValueError: - # If no matching endpoint is found, pass through to FastAPI - logger.debug(f"No matching route found for path: {path}, falling back to FastAPI") - return await self.app(scope, receive, send) - - # Log deprecation warning if route is deprecated - if getattr(webmethod, "deprecated", False): - logger.warning( - f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - " - f"This route is deprecated and may be removed in a future version. " - f"Please check the docs for the supported version." - ) - - trace_attributes = {"__location__": "server", "raw_path": path} - - # Extract W3C trace context headers and store as trace attributes - headers = dict(scope.get("headers", [])) - traceparent = headers.get(b"traceparent", b"").decode() - if traceparent: - trace_attributes["traceparent"] = traceparent - tracestate = headers.get(b"tracestate", b"").decode() - if tracestate: - trace_attributes["tracestate"] = tracestate - - trace_path = webmethod.descriptive_name or route_path - trace_context = await start_trace(trace_path, trace_attributes) - - async def send_with_trace_id(message): - if message["type"] == "http.response.start": - headers = message.get("headers", []) - headers.append([b"x-trace-id", str(trace_context.trace_id).encode()]) - message["headers"] = headers - await send(message) - - try: - return await self.app(scope, receive, send_with_trace_id) - finally: - await end_trace() diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py deleted file mode 100644 index 9df170e10..000000000 --- a/llama_stack/core/storage/datatypes.py +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import re -from abc import abstractmethod -from enum import StrEnum -from pathlib import Path -from typing import Annotated, Literal - -from pydantic import BaseModel, Field, field_validator - - -class StorageBackendType(StrEnum): - KV_REDIS = "kv_redis" - KV_SQLITE = "kv_sqlite" - KV_POSTGRES = "kv_postgres" - KV_MONGODB = "kv_mongodb" - SQL_SQLITE = "sql_sqlite" - SQL_POSTGRES = "sql_postgres" - - -class CommonConfig(BaseModel): - namespace: str | None = Field( - default=None, - description="All keys will be prefixed with this namespace", - ) - - -class RedisKVStoreConfig(CommonConfig): - type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS - host: str = "localhost" - port: int = 6379 - - @property - def url(self) -> str: - return f"redis://{self.host}:{self.port}" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["redis"] - - @classmethod - def sample_run_config(cls): - return { - "type": StorageBackendType.KV_REDIS.value, - "host": "${env.REDIS_HOST:=localhost}", - "port": "${env.REDIS_PORT:=6379}", - } - - -class SqliteKVStoreConfig(CommonConfig): - type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE - db_path: str = Field( - description="File path for the sqlite database", - ) - - @classmethod - def pip_packages(cls) -> list[str]: - return ["aiosqlite"] - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): - return { - "type": StorageBackendType.KV_SQLITE.value, - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - -class PostgresKVStoreConfig(CommonConfig): - type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES - host: str = "localhost" - port: int | str = 5432 - db: str = "llamastack" - user: str - password: str | None = None - ssl_mode: str | None = None - ca_cert_path: str | None = None - table_name: str = "llamastack_kvstore" - - @classmethod - def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): - return { - "type": StorageBackendType.KV_POSTGRES.value, - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", - } - - @classmethod - @field_validator("table_name") - def validate_table_name(cls, v: str) -> str: - # PostgreSQL identifiers rules: - # - Must start with a letter or underscore - # - Can contain letters, numbers, and underscores - # - Maximum length is 63 bytes - pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" - if not re.match(pattern, v): - raise ValueError( - "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" - ) - if len(v) > 63: - raise ValueError("Table name must be less than 63 characters") - return v - - @classmethod - def pip_packages(cls) -> list[str]: - return ["psycopg2-binary"] - - -class MongoDBKVStoreConfig(CommonConfig): - type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB - host: str = "localhost" - port: int = 27017 - db: str = "llamastack" - user: str | None = None - password: str | None = None - collection_name: str = "llamastack_kvstore" - - @classmethod - def pip_packages(cls) -> list[str]: - return ["pymongo"] - - @classmethod - def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): - return { - "type": StorageBackendType.KV_MONGODB.value, - "host": "${env.MONGODB_HOST:=localhost}", - "port": "${env.MONGODB_PORT:=5432}", - "db": "${env.MONGODB_DB}", - "user": "${env.MONGODB_USER}", - "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", - } - - -class SqlAlchemySqlStoreConfig(BaseModel): - @property - @abstractmethod - def engine_str(self) -> str: ... - - # TODO: move this when we have a better way to specify dependencies with internal APIs - @classmethod - def pip_packages(cls) -> list[str]: - return ["sqlalchemy[asyncio]"] - - -class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE - db_path: str = Field( - description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", - ) - - @property - def engine_str(self) -> str: - return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() - - @classmethod - def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): - return { - "type": StorageBackendType.SQL_SQLITE.value, - "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - } - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["aiosqlite"] - - -class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): - type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES - host: str = "localhost" - port: int | str = 5432 - db: str = "llamastack" - user: str - password: str | None = None - - @property - def engine_str(self) -> str: - return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" - - @classmethod - def pip_packages(cls) -> list[str]: - return super().pip_packages() + ["asyncpg"] - - @classmethod - def sample_run_config(cls, **kwargs): - return { - "type": StorageBackendType.SQL_POSTGRES.value, - "host": "${env.POSTGRES_HOST:=localhost}", - "port": "${env.POSTGRES_PORT:=5432}", - "db": "${env.POSTGRES_DB:=llamastack}", - "user": "${env.POSTGRES_USER:=llamastack}", - "password": "${env.POSTGRES_PASSWORD:=llamastack}", - } - - -# reference = (backend_name, table_name) -class SqlStoreReference(BaseModel): - """A reference to a 'SQL-like' persistent store. A table name must be provided.""" - - table_name: str = Field( - description="Name of the table to use for the SqlStore", - ) - - backend: str = Field( - description="Name of backend from storage.backends", - ) - - -# reference = (backend_name, namespace) -class KVStoreReference(BaseModel): - """A reference to a 'key-value' persistent store. A namespace must be provided.""" - - namespace: str = Field( - description="Key prefix for KVStore backends", - ) - - backend: str = Field( - description="Name of backend from storage.backends", - ) - - -StorageBackendConfig = Annotated[ - RedisKVStoreConfig - | SqliteKVStoreConfig - | PostgresKVStoreConfig - | MongoDBKVStoreConfig - | SqliteSqlStoreConfig - | PostgresSqlStoreConfig, - Field(discriminator="type"), -] - - -class InferenceStoreReference(SqlStoreReference): - """Inference store configuration with queue tuning.""" - - max_write_queue_size: int = Field( - default=10000, - description="Max queued writes for inference store", - ) - num_writers: int = Field( - default=4, - description="Number of concurrent background writers", - ) - - -class ResponsesStoreReference(InferenceStoreReference): - """Responses store configuration with queue tuning.""" - - -class ServerStoresConfig(BaseModel): - metadata: KVStoreReference | None = Field( - default=None, - description="Metadata store configuration (uses KV backend)", - ) - inference: InferenceStoreReference | None = Field( - default=None, - description="Inference store configuration (uses SQL backend)", - ) - conversations: SqlStoreReference | None = Field( - default=None, - description="Conversations store configuration (uses SQL backend)", - ) - responses: ResponsesStoreReference | None = Field( - default=None, - description="Responses store configuration (uses SQL backend)", - ) - - -class StorageConfig(BaseModel): - backends: dict[str, StorageBackendConfig] = Field( - description="Named backend configurations (e.g., 'default', 'cache')", - ) - stores: ServerStoresConfig = Field( - default_factory=lambda: ServerStoresConfig(), - description="Named references to storage backends used by the stack core", - ) diff --git a/llama_stack/core/ui/Containerfile b/llama_stack/core/ui/Containerfile deleted file mode 100644 index 0126d1867..000000000 --- a/llama_stack/core/ui/Containerfile +++ /dev/null @@ -1,11 +0,0 @@ -# More info on playground configuration can be found here: -# https://llama-stack.readthedocs.io/en/latest/playground - -FROM python:3.12-slim -WORKDIR /app -COPY . /app/ -RUN /usr/local/bin/python -m pip install --upgrade pip && \ - /usr/local/bin/pip3 install -r requirements.txt -EXPOSE 8501 - -ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/llama_stack/core/ui/README.md b/llama_stack/core/ui/README.md deleted file mode 100644 index 37f1501c9..000000000 --- a/llama_stack/core/ui/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# (Experimental) LLama Stack UI - -## Docker Setup - -:warning: This is a work in progress. - -## Developer Setup - -1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll). - -``` -llama stack list-deps together | xargs -L1 uv pip install - -llama stack run together -``` - -2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page). - -```bash -llama-stack-client datasets register \ ---dataset-id "mmlu" \ ---provider-id "huggingface" \ ---url "https://huggingface.co/datasets/llamastack/evals" \ ---metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ ---schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' -``` - -```bash -llama-stack-client benchmarks register \ ---eval-task-id meta-reference-mmlu \ ---provider-id meta-reference \ ---dataset-id mmlu \ ---scoring-functions basic::regex_parser_multiple_choice_answer -``` - -3. Start Streamlit UI - -```bash -uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py -``` - -## Environment Variables - -| Environment Variable | Description | Default Value | -|----------------------------|------------------------------------|---------------------------| -| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 | -| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) | -| TOGETHER_API_KEY | API key for Together provider | (empty string) | -| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) | -| OPENAI_API_KEY | API key for OpenAI provider | (empty string) | diff --git a/llama_stack/core/ui/app.py b/llama_stack/core/ui/app.py deleted file mode 100644 index 441f65d20..000000000 --- a/llama_stack/core/ui/app.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import streamlit as st - - -def main(): - # Evaluation pages - application_evaluation_page = st.Page( - "page/evaluations/app_eval.py", - title="Evaluations (Scoring)", - icon="📊", - default=False, - ) - native_evaluation_page = st.Page( - "page/evaluations/native_eval.py", - title="Evaluations (Generation + Scoring)", - icon="📊", - default=False, - ) - - # Playground pages - chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True) - rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False) - tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False) - - # Distribution pages - resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False) - provider_page = st.Page( - "page/distribution/providers.py", - title="API Providers", - icon="🔍", - default=False, - ) - - pg = st.navigation( - { - "Playground": [ - chat_page, - rag_page, - tool_page, - application_evaluation_page, - native_evaluation_page, - ], - "Inspect": [provider_page, resources_page], - }, - expanded=False, - ) - pg.run() - - -if __name__ == "__main__": - main() diff --git a/llama_stack/core/ui/modules/api.py b/llama_stack/core/ui/modules/api.py deleted file mode 100644 index 9db87b280..000000000 --- a/llama_stack/core/ui/modules/api.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -from llama_stack_client import LlamaStackClient - - -class LlamaStackApi: - def __init__(self): - self.client = LlamaStackClient( - base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"), - provider_data={ - "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""), - "together_api_key": os.environ.get("TOGETHER_API_KEY", ""), - "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""), - "openai_api_key": os.environ.get("OPENAI_API_KEY", ""), - "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""), - }, - ) - - def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): - """Run scoring on a single row""" - if not scoring_params: - scoring_params = dict.fromkeys(scoring_function_ids) - return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) - - -llama_stack_api = LlamaStackApi() diff --git a/llama_stack/core/ui/modules/utils.py b/llama_stack/core/ui/modules/utils.py deleted file mode 100644 index 67cce98fa..000000000 --- a/llama_stack/core/ui/modules/utils.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import base64 -import os - -import pandas as pd -import streamlit as st - - -def process_dataset(file): - if file is None: - return "No file uploaded", None - - try: - # Determine file type and read accordingly - file_ext = os.path.splitext(file.name)[1].lower() - if file_ext == ".csv": - df = pd.read_csv(file) - elif file_ext in [".xlsx", ".xls"]: - df = pd.read_excel(file) - else: - return "Unsupported file format. Please upload a CSV or Excel file.", None - - return df - - except Exception as e: - st.error(f"Error processing file: {str(e)}") - return None - - -def data_url_from_file(file) -> str: - file_content = file.getvalue() - base64_content = base64.b64encode(file_content).decode("utf-8") - mime_type = file.type - - data_url = f"data:{mime_type};base64,{base64_content}" - - return data_url diff --git a/llama_stack/core/ui/page/distribution/datasets.py b/llama_stack/core/ui/page/distribution/datasets.py deleted file mode 100644 index aab0901ac..000000000 --- a/llama_stack/core/ui/page/distribution/datasets.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def datasets(): - st.header("Datasets") - - datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()} - if len(datasets_info) > 0: - selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys())) - st.json(datasets_info[selected_dataset], expanded=True) diff --git a/llama_stack/core/ui/page/distribution/eval_tasks.py b/llama_stack/core/ui/page/distribution/eval_tasks.py deleted file mode 100644 index 1a0ce502b..000000000 --- a/llama_stack/core/ui/page/distribution/eval_tasks.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def benchmarks(): - # Benchmarks Section - st.header("Benchmarks") - - benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()} - - if len(benchmarks_info) > 0: - selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect") - st.json(benchmarks_info[selected_benchmark], expanded=True) diff --git a/llama_stack/core/ui/page/distribution/models.py b/llama_stack/core/ui/page/distribution/models.py deleted file mode 100644 index f84508746..000000000 --- a/llama_stack/core/ui/page/distribution/models.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def models(): - # Models Section - st.header("Models") - models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()} - - selected_model = st.selectbox("Select a model", list(models_info.keys())) - st.json(models_info[selected_model]) diff --git a/llama_stack/core/ui/page/distribution/providers.py b/llama_stack/core/ui/page/distribution/providers.py deleted file mode 100644 index 3ec6026d1..000000000 --- a/llama_stack/core/ui/page/distribution/providers.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def providers(): - st.header("🔍 API Providers") - apis_providers_lst = llama_stack_api.client.providers.list() - api_to_providers = {} - for api_provider in apis_providers_lst: - if api_provider.api in api_to_providers: - api_to_providers[api_provider.api].append(api_provider) - else: - api_to_providers[api_provider.api] = [api_provider] - - for api in api_to_providers.keys(): - st.markdown(f"###### {api}") - st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500) - - -providers() diff --git a/llama_stack/core/ui/page/distribution/resources.py b/llama_stack/core/ui/page/distribution/resources.py deleted file mode 100644 index 6e7122ceb..000000000 --- a/llama_stack/core/ui/page/distribution/resources.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from streamlit_option_menu import option_menu - -from llama_stack.core.ui.page.distribution.datasets import datasets -from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks -from llama_stack.core.ui.page.distribution.models import models -from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions -from llama_stack.core.ui.page.distribution.shields import shields - - -def resources_page(): - options = [ - "Models", - "Shields", - "Scoring Functions", - "Datasets", - "Benchmarks", - ] - icons = ["magic", "shield", "file-bar-graph", "database", "list-task"] - selected_resource = option_menu( - None, - options, - icons=icons, - orientation="horizontal", - styles={ - "nav-link": { - "font-size": "12px", - }, - }, - ) - if selected_resource == "Benchmarks": - benchmarks() - elif selected_resource == "Datasets": - datasets() - elif selected_resource == "Models": - models() - elif selected_resource == "Scoring Functions": - scoring_functions() - elif selected_resource == "Shields": - shields() - - -resources_page() diff --git a/llama_stack/core/ui/page/distribution/scoring_functions.py b/llama_stack/core/ui/page/distribution/scoring_functions.py deleted file mode 100644 index 2a5196fa9..000000000 --- a/llama_stack/core/ui/page/distribution/scoring_functions.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def scoring_functions(): - st.header("Scoring Functions") - - scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()} - - selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys())) - st.json(scoring_functions_info[selected_scoring_function], expanded=True) diff --git a/llama_stack/core/ui/page/distribution/shields.py b/llama_stack/core/ui/page/distribution/shields.py deleted file mode 100644 index ecce2f12b..000000000 --- a/llama_stack/core/ui/page/distribution/shields.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def shields(): - # Shields Section - st.header("Shields") - - shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()} - - selected_shield = st.selectbox("Select a shield", list(shields_info.keys())) - st.json(shields_info[selected_shield]) diff --git a/llama_stack/core/ui/page/evaluations/app_eval.py b/llama_stack/core/ui/page/evaluations/app_eval.py deleted file mode 100644 index 07e6349c9..000000000 --- a/llama_stack/core/ui/page/evaluations/app_eval.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json - -import pandas as pd -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api -from llama_stack.core.ui.modules.utils import process_dataset - - -def application_evaluation_page(): - st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙") - st.title("📊 Evaluations (Scoring)") - - # File uploader - uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) - - if uploaded_file is None: - st.error("No file uploaded") - return - - # Process uploaded file - df = process_dataset(uploaded_file) - if df is None: - st.error("Error processing file") - return - - # Display dataset information - st.success("Dataset loaded successfully!") - - # Display dataframe preview - st.subheader("Dataset Preview") - st.dataframe(df) - - # Select Scoring Functions to Run Evaluation On - st.subheader("Select Scoring Functions") - scoring_functions = llama_stack_api.client.scoring_functions.list() - scoring_functions = {sf.identifier: sf for sf in scoring_functions} - scoring_functions_names = list(scoring_functions.keys()) - selected_scoring_functions = st.multiselect( - "Choose one or more scoring functions", - options=scoring_functions_names, - help="Choose one or more scoring functions.", - ) - - available_models = llama_stack_api.client.models.list() - available_models = [m.identifier for m in available_models] - - scoring_params = {} - if selected_scoring_functions: - st.write("Selected:") - for scoring_fn_id in selected_scoring_functions: - scoring_fn = scoring_functions[scoring_fn_id] - st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") - new_params = None - if scoring_fn.params: - new_params = {} - for param_name, param_value in scoring_fn.params.to_dict().items(): - if param_name == "type": - new_params[param_name] = param_value - continue - - if param_name == "judge_model": - value = st.selectbox( - f"Select **{param_name}** for {scoring_fn_id}", - options=available_models, - index=0, - key=f"{scoring_fn_id}_{param_name}", - ) - new_params[param_name] = value - else: - value = st.text_area( - f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", - value=json.dumps(param_value, indent=2), - height=80, - ) - try: - new_params[param_name] = json.loads(value) - except json.JSONDecodeError: - st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}") - - st.json(new_params) - scoring_params[scoring_fn_id] = new_params - - # Add run evaluation button & slider - total_rows = len(df) - num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) - - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = df.to_dict(orient="records") - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - - # Run evaluation for current row - score_res = llama_stack_api.run_scoring( - r, - scoring_function_ids=selected_scoring_functions, - scoring_params=scoring_params, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for fn_id in selected_scoring_functions: - if fn_id not in output_res: - output_res[fn_id] = [] - output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) - - # Display current row results using separate containers - progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})") - results_container.json( - score_res.to_json(), - expanded=2, - ) - - progress_bar.progress(1.0, text="Evaluation complete!") - - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) - - -application_evaluation_page() diff --git a/llama_stack/core/ui/page/evaluations/native_eval.py b/llama_stack/core/ui/page/evaluations/native_eval.py deleted file mode 100644 index 2bef63b2f..000000000 --- a/llama_stack/core/ui/page/evaluations/native_eval.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json - -import pandas as pd -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - - -def select_benchmark_1(): - # Select Benchmarks - st.subheader("1. Choose An Eval Task") - benchmarks = llama_stack_api.client.benchmarks.list() - benchmarks = {et.identifier: et for et in benchmarks} - benchmarks_names = list(benchmarks.keys()) - selected_benchmark = st.selectbox( - "Choose an eval task.", - options=benchmarks_names, - help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.", - ) - with st.expander("View Eval Task"): - st.json(benchmarks[selected_benchmark], expanded=True) - - st.session_state["selected_benchmark"] = selected_benchmark - st.session_state["benchmarks"] = benchmarks - if st.button("Confirm", key="confirm_1"): - st.session_state["selected_benchmark_1_next"] = True - - -def define_eval_candidate_2(): - if not st.session_state.get("selected_benchmark_1_next", None): - return - - st.subheader("2. Define Eval Candidate") - st.info( - """ - Define the configurations for the evaluation candidate model or agent used for generation. - Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig. - """ - ) - with st.expander("Define Eval Candidate", expanded=True): - # Define Eval Candidate - candidate_type = st.radio("Candidate Type", ["model", "agent"]) - - available_models = llama_stack_api.client.models.list() - available_models = [model.identifier for model in available_models] - selected_model = st.selectbox( - "Choose a model", - available_models, - index=0, - ) - - # Sampling Parameters - st.markdown("##### Sampling Parameters") - temperature = st.slider( - "Temperature", - min_value=0.0, - max_value=1.0, - value=0.0, - step=0.1, - help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", - ) - top_p = st.slider( - "Top P", - min_value=0.0, - max_value=1.0, - value=0.95, - step=0.1, - ) - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=1, - help="The maximum number of tokens to generate", - ) - repetition_penalty = st.slider( - "Repetition Penalty", - min_value=1.0, - max_value=2.0, - value=1.0, - step=0.1, - help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", - ) - if candidate_type == "model": - if temperature > 0.0: - strategy = { - "type": "top_p", - "temperature": temperature, - "top_p": top_p, - } - else: - strategy = {"type": "greedy"} - - eval_candidate = { - "type": "model", - "model": selected_model, - "sampling_params": { - "strategy": strategy, - "max_tokens": max_tokens, - "repetition_penalty": repetition_penalty, - }, - } - elif candidate_type == "agent": - system_prompt = st.text_area( - "System Prompt", - value="You are a helpful AI assistant.", - help="Initial instructions given to the AI to set its behavior and context", - ) - tools_json = st.text_area( - "Tools Configuration (JSON)", - value=json.dumps( - [ - { - "type": "brave_search", - "engine": "brave", - "api_key": "ENTER_BRAVE_API_KEY_HERE", - } - ] - ), - help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.", - height=200, - ) - try: - tools = json.loads(tools_json) - except json.JSONDecodeError: - st.error("Invalid JSON format for tools configuration") - tools = [] - eval_candidate = { - "type": "agent", - "config": { - "model": selected_model, - "instructions": system_prompt, - "tools": tools, - "tool_choice": "auto", - "tool_prompt_format": "json", - "input_shields": [], - "output_shields": [], - "enable_session_persistence": False, - }, - } - st.session_state["eval_candidate"] = eval_candidate - - if st.button("Confirm", key="confirm_2"): - st.session_state["selected_eval_candidate_2_next"] = True - - -def run_evaluation_3(): - if not st.session_state.get("selected_eval_candidate_2_next", None): - return - - st.subheader("3. Run Evaluation") - # Add info box to explain configurations being used - st.info( - """ - Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button. - """ - ) - selected_benchmark = st.session_state["selected_benchmark"] - benchmarks = st.session_state["benchmarks"] - eval_candidate = st.session_state["eval_candidate"] - - dataset_id = benchmarks[selected_benchmark].dataset_id - rows = llama_stack_api.client.datasets.iterrows( - dataset_id=dataset_id, - ) - total_rows = len(rows.data) - # Add number of examples control - num_rows = st.number_input( - "Number of Examples to Evaluate", - min_value=1, - max_value=total_rows, - value=5, - help="Number of examples from the dataset to evaluate. ", - ) - - benchmark_config = { - "type": "benchmark", - "eval_candidate": eval_candidate, - "scoring_params": {}, - } - - with st.expander("View Evaluation Task", expanded=True): - st.json(benchmarks[selected_benchmark], expanded=True) - with st.expander("View Evaluation Task Configuration", expanded=True): - st.json(benchmark_config, expanded=True) - - # Add run button and handle evaluation - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = rows.data - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - # Run evaluation for current row - eval_res = llama_stack_api.client.eval.evaluate_rows( - benchmark_id=selected_benchmark, - input_rows=[r], - scoring_functions=benchmarks[selected_benchmark].scoring_functions, - benchmark_config=benchmark_config, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for k in eval_res.generations[0].keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(eval_res.generations[0][k]) - - for scoring_fn in benchmarks[selected_benchmark].scoring_functions: - if scoring_fn not in output_res: - output_res[scoring_fn] = [] - output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0]) - - progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})") - results_container.json(eval_res, expanded=2) - - progress_bar.progress(1.0, text="Evaluation complete!") - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) - - -def native_evaluation_page(): - st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙") - st.title("📊 Evaluations (Generation + Scoring)") - - select_benchmark_1() - define_eval_candidate_2() - run_evaluation_3() - - -native_evaluation_page() diff --git a/llama_stack/core/ui/page/playground/chat.py b/llama_stack/core/ui/page/playground/chat.py deleted file mode 100644 index d391d0fb7..000000000 --- a/llama_stack/core/ui/page/playground/chat.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import streamlit as st - -from llama_stack.core.ui.modules.api import llama_stack_api - -# Sidebar configurations -with st.sidebar: - st.header("Configuration") - available_models = llama_stack_api.client.models.list() - available_models = [model.identifier for model in available_models if model.model_type == "llm"] - selected_model = st.selectbox( - "Choose a model", - available_models, - index=0, - ) - - temperature = st.slider( - "Temperature", - min_value=0.0, - max_value=1.0, - value=0.0, - step=0.1, - help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", - ) - - top_p = st.slider( - "Top P", - min_value=0.0, - max_value=1.0, - value=0.95, - step=0.1, - ) - - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=1, - help="The maximum number of tokens to generate", - ) - - repetition_penalty = st.slider( - "Repetition Penalty", - min_value=1.0, - max_value=2.0, - value=1.0, - step=0.1, - help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", - ) - - stream = st.checkbox("Stream", value=True) - system_prompt = st.text_area( - "System Prompt", - value="You are a helpful AI assistant.", - help="Initial instructions given to the AI to set its behavior and context", - ) - - # Add clear chat button to sidebar - if st.button("Clear Chat", use_container_width=True): - st.session_state.messages = [] - st.rerun() - - -# Main chat interface -st.title("🦙 Chat") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - -# Chat input -if prompt := st.chat_input("Example: What is Llama Stack?"): - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - # Display user message - with st.chat_message("user"): - st.markdown(prompt) - - # Display assistant response - with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - - if temperature > 0.0: - strategy = { - "type": "top_p", - "temperature": temperature, - "top_p": top_p, - } - else: - strategy = {"type": "greedy"} - - response = llama_stack_api.client.inference.chat_completion( - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt}, - ], - model_id=selected_model, - stream=stream, - sampling_params={ - "strategy": strategy, - "max_tokens": max_tokens, - "repetition_penalty": repetition_penalty, - }, - ) - - if stream: - for chunk in response: - if chunk.event.event_type == "progress": - full_response += chunk.event.delta.text - message_placeholder.markdown(full_response + "▌") - message_placeholder.markdown(full_response) - else: - full_response = response.completion_message.content - message_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) diff --git a/llama_stack/core/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py deleted file mode 100644 index 16fd464ee..000000000 --- a/llama_stack/core/ui/page/playground/tools.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import enum -import json -import uuid - -import streamlit as st -from llama_stack_client import Agent -from llama_stack_client.lib.agents.react.agent import ReActAgent -from llama_stack_client.lib.agents.react.tool_parser import ReActOutput - -from llama_stack.core.ui.modules.api import llama_stack_api - - -class AgentType(enum.Enum): - REGULAR = "Regular" - REACT = "ReAct" - - -def tool_chat_page(): - st.title("🛠 Tools") - - client = llama_stack_api.client - models = client.models.list() - model_list = [model.identifier for model in models if model.api_model_type == "llm"] - - tool_groups = client.toolgroups.list() - tool_groups_list = [tool_group.identifier for tool_group in tool_groups] - mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] - builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] - selected_vector_stores = [] - - def reset_agent(): - st.session_state.clear() - st.cache_resource.clear() - - with st.sidebar: - st.title("Configuration") - st.subheader("Model") - model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed") - - st.subheader("Available ToolGroups") - - toolgroup_selection = st.pills( - label="Built-in tools", - options=builtin_tools_list, - selection_mode="multi", - on_change=reset_agent, - format_func=lambda tool: "".join(tool.split("::")[1:]), - help="List of built-in tools from your llama stack server.", - ) - - if "builtin::rag" in toolgroup_selection: - vector_stores = llama_stack_api.client.vector_stores.list() or [] - if not vector_stores: - st.info("No vector databases available for selection.") - vector_stores = [vector_store.identifier for vector_store in vector_stores] - selected_vector_stores = st.multiselect( - label="Select Document Collections to use in RAG queries", - options=vector_stores, - on_change=reset_agent, - ) - - mcp_selection = st.pills( - label="MCP Servers", - options=mcp_tools_list, - selection_mode="multi", - on_change=reset_agent, - format_func=lambda tool: "".join(tool.split("::")[1:]), - help="List of MCP servers registered to your llama stack server.", - ) - - toolgroup_selection.extend(mcp_selection) - - grouped_tools = {} - total_tools = 0 - - for toolgroup_id in toolgroup_selection: - tools = client.tools.list(toolgroup_id=toolgroup_id) - grouped_tools[toolgroup_id] = [tool.name for tool in tools] - total_tools += len(tools) - - st.markdown(f"Active Tools: 🛠 {total_tools}") - - for group_id, tools in grouped_tools.items(): - with st.expander(f"🔧 Tools from `{group_id}`"): - for idx, tool in enumerate(tools, start=1): - st.markdown(f"{idx}. `{tool.split(':')[-1]}`") - - st.subheader("Agent Configurations") - st.subheader("Agent Type") - agent_type = st.radio( - label="Select Agent Type", - options=["Regular", "ReAct"], - on_change=reset_agent, - ) - - if agent_type == "ReAct": - agent_type = AgentType.REACT - else: - agent_type = AgentType.REGULAR - - max_tokens = st.slider( - "Max Tokens", - min_value=0, - max_value=4096, - value=512, - step=64, - help="The maximum number of tokens to generate", - on_change=reset_agent, - ) - - for i, tool_name in enumerate(toolgroup_selection): - if tool_name == "builtin::rag": - tool_dict = dict( - name="builtin::rag", - args={ - "vector_store_ids": list(selected_vector_stores), - }, - ) - toolgroup_selection[i] = tool_dict - - @st.cache_resource - def create_agent(): - if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT: - return ReActAgent( - client=client, - model=model, - tools=toolgroup_selection, - response_format={ - "type": "json_schema", - "json_schema": ReActOutput.model_json_schema(), - }, - sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens}, - ) - else: - return Agent( - client, - model=model, - instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.", - tools=toolgroup_selection, - sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens}, - ) - - st.session_state.agent_type = agent_type - - agent = create_agent() - - if "agent_session_id" not in st.session_state: - st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}") - - session_id = st.session_state["agent_session_id"] - - if "messages" not in st.session_state: - st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] - - for msg in st.session_state.messages: - with st.chat_message(msg["role"]): - st.markdown(msg["content"]) - - if prompt := st.chat_input(placeholder=""): - with st.chat_message("user"): - st.markdown(prompt) - - st.session_state.messages.append({"role": "user", "content": prompt}) - - turn_response = agent.create_turn( - session_id=session_id, - messages=[{"role": "user", "content": prompt}], - stream=True, - ) - - def response_generator(turn_response): - if st.session_state.get("agent_type") == AgentType.REACT: - return _handle_react_response(turn_response) - else: - return _handle_regular_response(turn_response) - - def _handle_react_response(turn_response): - current_step_content = "" - final_answer = None - tool_results = [] - - for response in turn_response: - if not hasattr(response.event, "payload"): - yield ( - "\n\n🚨 :red[_Llama Stack server Error:_]\n" - "The response received is missing an expected `payload` attribute.\n" - "This could indicate a malformed response or an internal issue within the server.\n\n" - f"Error details: {response}" - ) - return - - payload = response.event.payload - - if payload.event_type == "step_progress" and hasattr(payload.delta, "text"): - current_step_content += payload.delta.text - continue - - if payload.event_type == "step_complete": - step_details = payload.step_details - - if step_details.step_type == "inference": - yield from _process_inference_step(current_step_content, tool_results, final_answer) - current_step_content = "" - elif step_details.step_type == "tool_execution": - tool_results = _process_tool_execution(step_details, tool_results) - current_step_content = "" - else: - current_step_content = "" - - if not final_answer and tool_results: - yield from _format_tool_results_summary(tool_results) - - def _process_inference_step(current_step_content, tool_results, final_answer): - try: - react_output_data = json.loads(current_step_content) - thought = react_output_data.get("thought") - action = react_output_data.get("action") - answer = react_output_data.get("answer") - - if answer and answer != "null" and answer is not None: - final_answer = answer - - if thought: - with st.expander("🤔 Thinking...", expanded=False): - st.markdown(f":grey[__{thought}__]") - - if action and isinstance(action, dict): - tool_name = action.get("tool_name") - tool_params = action.get("tool_params") - with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False): - st.json(tool_params) - - if answer and answer != "null" and answer is not None: - yield f"\n\n✅ **Final Answer:**\n{answer}" - - except json.JSONDecodeError: - yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```" - except Exception as e: - yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```" - - return final_answer - - def _process_tool_execution(step_details, tool_results): - try: - if hasattr(step_details, "tool_responses") and step_details.tool_responses: - for tool_response in step_details.tool_responses: - tool_name = tool_response.tool_name - content = tool_response.content - tool_results.append((tool_name, content)) - with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False): - try: - parsed_content = json.loads(content) - st.json(parsed_content) - except json.JSONDecodeError: - st.code(content, language=None) - else: - with st.expander("⚙️ Observation", expanded=False): - st.markdown(":grey[_Tool execution step completed, but no response data found._]") - except Exception as e: - with st.expander("⚙️ Error in Tool Execution", expanded=False): - st.markdown(f":red[_Error processing tool execution: {str(e)}_]") - - return tool_results - - def _format_tool_results_summary(tool_results): - yield "\n\n**Here's what I found:**\n" - for tool_name, content in tool_results: - try: - parsed_content = json.loads(content) - - if tool_name == "web_search" and "top_k" in parsed_content: - yield from _format_web_search_results(parsed_content) - elif "results" in parsed_content and isinstance(parsed_content["results"], list): - yield from _format_results_list(parsed_content["results"]) - elif isinstance(parsed_content, dict) and len(parsed_content) > 0: - yield from _format_dict_results(parsed_content) - elif isinstance(parsed_content, list) and len(parsed_content) > 0: - yield from _format_list_results(parsed_content) - except json.JSONDecodeError: - yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n" - except (TypeError, AttributeError, KeyError, IndexError) as e: - print(f"Error processing {tool_name} result: {type(e).__name__}: {e}") - - def _format_web_search_results(parsed_content): - for i, result in enumerate(parsed_content["top_k"], 1): - if i <= 3: - title = result.get("title", "Untitled") - url = result.get("url", "") - content_text = result.get("content", "").strip() - yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n" - - def _format_results_list(results): - for i, result in enumerate(results, 1): - if i <= 3: - if isinstance(result, dict): - name = result.get("name", result.get("title", "Result " + str(i))) - description = result.get("description", result.get("content", result.get("summary", ""))) - yield f"\n- **{name}**\n {description}\n" - else: - yield f"\n- {result}\n" - - def _format_dict_results(parsed_content): - yield "\n```\n" - for key, value in list(parsed_content.items())[:5]: - if isinstance(value, str) and len(value) < 100: - yield f"{key}: {value}\n" - else: - yield f"{key}: [Complex data]\n" - yield "```\n" - - def _format_list_results(parsed_content): - yield "\n" - for _, item in enumerate(parsed_content[:3], 1): - if isinstance(item, str): - yield f"- {item}\n" - elif isinstance(item, dict) and "text" in item: - yield f"- {item['text']}\n" - elif isinstance(item, dict) and len(item) > 0: - first_value = next(iter(item.values())) - if isinstance(first_value, str) and len(first_value) < 100: - yield f"- {first_value}\n" - - def _handle_regular_response(turn_response): - for response in turn_response: - if hasattr(response.event, "payload"): - print(response.event.payload) - if response.event.payload.event_type == "step_progress": - if hasattr(response.event.payload.delta, "text"): - yield response.event.payload.delta.text - if response.event.payload.event_type == "step_complete": - if response.event.payload.step_details.step_type == "tool_execution": - if response.event.payload.step_details.tool_calls: - tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name) - yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n' - else: - yield "No tool_calls present in step_details" - else: - yield f"Error occurred in the Llama Stack Cluster: {response}" - - with st.chat_message("assistant"): - response_content = st.write_stream(response_generator(turn_response)) - - st.session_state.messages.append({"role": "assistant", "content": response_content}) - - -tool_chat_page() diff --git a/llama_stack/core/ui/requirements.txt b/llama_stack/core/ui/requirements.txt deleted file mode 100644 index 53a1e7bf3..000000000 --- a/llama_stack/core/ui/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -llama-stack>=0.2.1 -llama-stack-client>=0.2.1 -pandas -streamlit -streamlit-option-menu diff --git a/llama_stack/core/utils/context.py b/llama_stack/core/utils/context.py deleted file mode 100644 index 24b249890..000000000 --- a/llama_stack/core/utils/context.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import AsyncGenerator -from contextvars import ContextVar - - -def preserve_contexts_async_generator[T]( - gen: AsyncGenerator[T, None], context_vars: list[ContextVar] -) -> AsyncGenerator[T, None]: - """ - Wraps an async generator to preserve context variables across iterations. - This is needed because we start a new asyncio event loop for each streaming request, - and we need to preserve the context across the event loop boundary. - """ - # Capture initial context values - initial_context_values = {context_var.name: context_var.get() for context_var in context_vars} - - async def wrapper() -> AsyncGenerator[T, None]: - while True: - try: - # Restore context values before any await - for context_var in context_vars: - context_var.set(initial_context_values[context_var.name]) - - item = await gen.__anext__() - - # Update our tracked values with any changes made during this iteration - for context_var in context_vars: - initial_context_values[context_var.name] = context_var.get() - - yield item - - except StopAsyncIteration: - break - - return wrapper() diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml deleted file mode 100644 index c01e415a9..000000000 --- a/llama_stack/distributions/ci-tests/build.yaml +++ /dev/null @@ -1,59 +0,0 @@ -version: 2 -distribution_spec: - description: CI tests for Llama Stack - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml deleted file mode 100644 index ecf9eed3b..000000000 --- a/llama_stack/distributions/ci-tests/run.yaml +++ /dev/null @@ -1,276 +0,0 @@ -version: 2 -image_name: ci-tests -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: torchtune-cpu - provider_type: inline::torchtune-cpu - config: - checkpoint_format: meta - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md deleted file mode 100644 index 4e28673e8..000000000 --- a/llama_stack/distributions/dell/doc_template.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -orphan: true ---- - -# Dell Distribution of Llama Stack - -```{toctree} -:maxdepth: 2 -:hidden: - -self -``` - -The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. - -{{ providers_table }} - -You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference. - -{% if run_config_env_vars %} -### Environment Variables - -The following environment variables can be configured: - -{% for var, (default_value, description) in run_config_env_vars.items() %} -- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) -{% endfor %} -{% endif %} - - -## Setting up Inference server using Dell Enterprise Hub's custom TGI container. - -NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified. - -```bash -export INFERENCE_PORT=8181 -export DEH_URL=http://0.0.0.0:$INFERENCE_PORT -export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct -export CHROMADB_HOST=localhost -export CHROMADB_PORT=6601 -export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT -export CUDA_VISIBLE_DEVICES=0 -export LLAMA_STACK_PORT=8321 - -docker run --rm -it \ - --pull always \ - --network host \ - -v $HOME/.cache/huggingface:/data \ - -e HF_TOKEN=$HF_TOKEN \ - -p $INFERENCE_PORT:$INFERENCE_PORT \ - --gpus $CUDA_VISIBLE_DEVICES \ - ghcr.io/huggingface/text-generation-inference \ - --dtype bfloat16 \ - --usage-stats off \ - --sharded false \ - --cuda-memory-fraction 0.7 \ - --model-id $INFERENCE_MODEL \ - --port $INFERENCE_PORT --hostname 0.0.0.0 -``` - -If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like: - -```bash -export SAFETY_INFERENCE_PORT=8282 -export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT -export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B -export CUDA_VISIBLE_DEVICES=1 - -docker run --rm -it \ - --pull always \ - --network host \ - -v $HOME/.cache/huggingface:/data \ - -e HF_TOKEN=$HF_TOKEN \ - -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \ - --gpus $CUDA_VISIBLE_DEVICES \ - ghcr.io/huggingface/text-generation-inference \ - --dtype bfloat16 \ - --usage-stats off \ - --sharded false \ - --cuda-memory-fraction 0.7 \ - --model-id $SAFETY_MODEL \ - --hostname 0.0.0.0 \ - --port $SAFETY_INFERENCE_PORT -``` - -## Dell distribution relies on ChromaDB for vector database usage - -You can start a chroma-db easily using docker. -```bash -# This is where the indices are persisted -mkdir -p $HOME/chromadb - -podman run --rm -it \ - --network host \ - --name chromadb \ - -v $HOME/chromadb:/chroma/chroma \ - -e IS_PERSISTENT=TRUE \ - chromadb/chroma:latest \ - --port $CHROMADB_PORT \ - --host $CHROMADB_HOST -``` - -## Running Llama Stack - -Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image. - -### Via Docker - -This method allows you to get started quickly without having to build the distribution code. - -```bash -docker run -it \ - --pull always \ - --network host \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v $HOME/.llama:/root/.llama \ - # NOTE: mount the llama-stack directory if testing local changes else not needed - -v $HOME/git/llama-stack:/app/llama-stack-source \ - # localhost/distribution-dell:dev if building / testing locally - -e INFERENCE_MODEL=$INFERENCE_MODEL \ - -e DEH_URL=$DEH_URL \ - -e CHROMA_URL=$CHROMA_URL \ - llamastack/distribution-{{ name }}\ - --port $LLAMA_STACK_PORT - -``` - -If you are using Llama Stack Safety / Shield APIs, use: - -```bash -# You need a local checkout of llama-stack to run this, get it using -# git clone https://github.com/meta-llama/llama-stack.git -cd /path/to/llama-stack - -export SAFETY_INFERENCE_PORT=8282 -export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT -export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B - -docker run \ - -it \ - --pull always \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v $HOME/.llama:/root/.llama \ - -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ - -e INFERENCE_MODEL=$INFERENCE_MODEL \ - -e DEH_URL=$DEH_URL \ - -e SAFETY_MODEL=$SAFETY_MODEL \ - -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ - -e CHROMA_URL=$CHROMA_URL \ - llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT -``` - -### Via Conda - -Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. - -```bash -llama stack list-deps {{ name }} | xargs -L1 pip install -INFERENCE_MODEL=$INFERENCE_MODEL \ -DEH_URL=$DEH_URL \ -CHROMA_URL=$CHROMA_URL \ -llama stack run {{ name }} \ - --port $LLAMA_STACK_PORT -``` - -If you are using Llama Stack Safety / Shield APIs, use: - -```bash -INFERENCE_MODEL=$INFERENCE_MODEL \ -DEH_URL=$DEH_URL \ -SAFETY_MODEL=$SAFETY_MODEL \ -DEH_SAFETY_URL=$DEH_SAFETY_URL \ -CHROMA_URL=$CHROMA_URL \ -llama stack run ./run-with-safety.yaml \ - --port $LLAMA_STACK_PORT -``` diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml deleted file mode 100644 index 2563f2f4b..000000000 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ /dev/null @@ -1,141 +0,0 @@ -version: 2 -image_name: dell -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: tgi0 - provider_type: remote::tgi - config: - url: ${env.DEH_URL} - - provider_id: tgi1 - provider_type: remote::tgi - config: - url: ${env.DEH_SAFETY_URL} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: chromadb - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: tgi1 - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: ${env.SAFETY_MODEL} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml deleted file mode 100644 index 7bada394f..000000000 --- a/llama_stack/distributions/dell/run.yaml +++ /dev/null @@ -1,132 +0,0 @@ -version: 2 -image_name: dell -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: tgi0 - provider_type: remote::tgi - config: - url: ${env.DEH_URL} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: chromadb - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: tgi0 - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: brave-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md deleted file mode 100644 index ec4452d81..000000000 --- a/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -orphan: true ---- -# Meta Reference GPU Distribution - -```{toctree} -:maxdepth: 2 -:hidden: - -self -``` - -The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations: - -{{ providers_table }} - -Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. - -{% if run_config_env_vars %} -### Environment Variables - -The following environment variables can be configured: - -{% for var, (default_value, description) in run_config_env_vars.items() %} -- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) -{% endfor %} -{% endif %} - - -## Prerequisite: Downloading Models - -Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI. -``` - -## Running the Distribution - -You can do this via venv or Docker which has a pre-built image. - -### Via Docker - -This method allows you to get started quickly without having to build the distribution code. - -```bash -LLAMA_STACK_PORT=8321 -docker run \ - -it \ - --pull always \ - --gpu all \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ - -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT -``` - -If you are using Llama Stack Safety / Shield APIs, use: - -```bash -docker run \ - -it \ - --pull always \ - --gpu all \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ - -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ - llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT -``` - -### Via venv - -Make sure you have the Llama Stack CLI available. - -```bash -llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install -INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ -llama stack run distributions/{{ name }}/run.yaml \ - --port 8321 -``` - -If you are using Llama Stack Safety / Shield APIs, use: - -```bash -INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ -SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ -llama stack run distributions/{{ name }}/run-with-safety.yaml \ - --port 8321 -``` diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml deleted file mode 100644 index 01b5db4f9..000000000 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ /dev/null @@ -1,154 +0,0 @@ -version: 2 -image_name: meta-reference-gpu -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - - provider_id: meta-reference-safety - provider_type: inline::meta-reference - config: - model: ${env.SAFETY_MODEL} - checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: meta-reference-safety - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: ${env.SAFETY_MODEL} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml deleted file mode 100644 index 87c33dde0..000000000 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ /dev/null @@ -1,139 +0,0 @@ -version: 2 -image_name: meta-reference-gpu -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: meta-reference-inference - provider_type: inline::meta-reference - config: - model: ${env.INFERENCE_MODEL} - checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} - quantization: - type: ${env.QUANTIZATION_TYPE:=bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} - max_batch_size: ${env.MAX_BATCH_SIZE:=1} - max_seq_len: ${env.MAX_SEQ_LEN:=4096} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: meta-reference-inference - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md deleted file mode 100644 index 40f39e4f3..000000000 --- a/llama_stack/distributions/nvidia/doc_template.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -orphan: true ---- -# NVIDIA Distribution - -The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. - -{{ providers_table }} - -{% if run_config_env_vars %} -### Environment Variables - -The following environment variables can be configured: - -{% for var, (default_value, description) in run_config_env_vars.items() %} -- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) -{% endfor %} -{% endif %} - -{% if default_models %} -### Models - -The following models are available by default: - -{% for model in default_models %} -- `{{ model.model_id }} {{ model.doc_string }}` -{% endfor %} -{% endif %} - - -## Prerequisites -### NVIDIA API Keys - -Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable. - -### Deploy NeMo Microservices Platform -The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform. - -## Supported Services -Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints. - -### Inference: NVIDIA NIM -NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs: - 1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key) - 2. Self-hosted: NVIDIA NIMs that run on your own infrastructure. - -The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment. - -### Datasetio API: NeMo Data Store -The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint. - -See the [NVIDIA Datasetio docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage. - -### Eval API: NeMo Evaluator -The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint. - -See the [NVIDIA Eval docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage. - -### Post-Training API: NeMo Customizer -The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint. - -See the [NVIDIA Post-Training docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage. - -### Safety API: NeMo Guardrails -The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint. - -See the [NVIDIA Safety docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/safety/nvidia/README.md) for supported features and example usage. - -## Deploying models -In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`. - -Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart. -```sh -# URL to NeMo NIM Proxy service -export NEMO_URL="http://nemo.test" - -curl --location "$NEMO_URL/v1/deployment/model-deployments" \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -d '{ - "name": "llama-3.2-1b-instruct", - "namespace": "meta", - "config": { - "model": "meta/llama-3.2-1b-instruct", - "nim_deployment": { - "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct", - "image_tag": "1.8.3", - "pvc_size": "25Gi", - "gpu": 1, - "additional_envs": { - "NIM_GUIDED_DECODING_BACKEND": "fast_outlines" - } - } - } - }' -``` -This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference. - -You can also remove a deployed NIM to free up GPU resources, if needed. -```sh -export NEMO_URL="http://nemo.test" - -curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct" -``` - -## Running Llama Stack with NVIDIA - -You can do this via venv (build code), or Docker which has a pre-built image. - -### Via Docker - -This method allows you to get started quickly without having to build the distribution code. - -```bash -LLAMA_STACK_PORT=8321 -docker run \ - -it \ - --pull always \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ - -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ - llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT -``` - -### Via venv - -If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. - -```bash -INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct -llama stack list-deps nvidia | xargs -L1 uv pip install -NVIDIA_API_KEY=$NVIDIA_API_KEY \ -INFERENCE_MODEL=$INFERENCE_MODEL \ -llama stack run ./run.yaml \ - --port 8321 -``` - -## Example Notebooks -For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in [docs/notebooks/nvidia](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks/nvidia). diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml deleted file mode 100644 index c23d0f9cb..000000000 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ /dev/null @@ -1,137 +0,0 @@ -version: 2 -image_name: nvidia -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: nvidia - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: nvidia - provider_type: remote::nvidia - config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} - post_training: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} - datasetio: - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} - scoring: - - provider_id: basic - provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: nvidia - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL} - provider_id: nvidia - model_type: llm - shields: - - shield_id: ${env.SAFETY_MODEL} - provider_id: nvidia - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml deleted file mode 100644 index 81e744d53..000000000 --- a/llama_stack/distributions/nvidia/run.yaml +++ /dev/null @@ -1,116 +0,0 @@ -version: 2 -image_name: nvidia -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: nvidia - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: nvidia - provider_type: remote::nvidia - config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: nvidia - provider_type: remote::nvidia - config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} - post_training: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} - datasetio: - - provider_id: nvidia - provider_type: remote::nvidia - config: - api_key: ${env.NVIDIA_API_KEY:=} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} - project_id: ${env.NVIDIA_PROJECT_ID:=test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} - scoring: - - provider_id: basic - provider_type: inline::basic - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: [] - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml deleted file mode 100644 index 4fd0e199b..000000000 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ /dev/null @@ -1,252 +0,0 @@ -version: 2 -image_name: open-benchmark -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - vector_io: - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: gpt-4o - provider_id: openai - provider_model_id: gpt-4o - model_type: llm - - metadata: {} - model_id: claude-3-5-sonnet-latest - provider_id: anthropic - provider_model_id: claude-3-5-sonnet-latest - model_type: llm - - metadata: {} - model_id: gemini/gemini-1.5-flash - provider_id: gemini - provider_model_id: gemini/gemini-1.5-flash - model_type: llm - - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: groq - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm - - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct - provider_id: together - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm - shields: - - shield_id: meta-llama/Llama-Guard-3-8B - vector_dbs: [] - datasets: - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/simpleqa?split=train - metadata: {} - dataset_id: simpleqa - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all - metadata: {} - dataset_id: mmlu_cot - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main - metadata: {} - dataset_id: gpqa_cot - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/math_500?split=test - metadata: {} - dataset_id: math_500 - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/IfEval?split=train - metadata: {} - dataset_id: ifeval - - purpose: eval/messages-answer - source: - type: uri - uri: huggingface://datasets/llamastack/docvqa?split=val - metadata: {} - dataset_id: docvqa - scoring_fns: [] - benchmarks: - - dataset_id: simpleqa - scoring_functions: - - llm-as-judge::405b-simpleqa - metadata: {} - benchmark_id: meta-reference-simpleqa - - dataset_id: mmlu_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-mmlu-cot - - dataset_id: gpqa_cot - scoring_functions: - - basic::regex_parser_multiple_choice_answer - metadata: {} - benchmark_id: meta-reference-gpqa-cot - - dataset_id: math_500 - scoring_functions: - - basic::regex_parser_math_response - metadata: {} - benchmark_id: meta-reference-math-500 - - dataset_id: ifeval - scoring_functions: - - basic::ifeval - metadata: {} - benchmark_id: meta-reference-ifeval - - dataset_id: docvqa - scoring_functions: - - basic::docvqa - metadata: {} - benchmark_id: meta-reference-docvqa - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/postgres-demo/__init__.py b/llama_stack/distributions/postgres-demo/__init__.py deleted file mode 100644 index 81473cb73..000000000 --- a/llama_stack/distributions/postgres-demo/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .postgres_demo import get_distribution_template # noqa: F401 diff --git a/llama_stack/distributions/postgres-demo/build.yaml b/llama_stack/distributions/postgres-demo/build.yaml deleted file mode 100644 index 063dc3999..000000000 --- a/llama_stack/distributions/postgres-demo/build.yaml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - provider_type: remote::vllm - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: remote::chromadb - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py deleted file mode 100644 index 876370ef3..000000000 --- a/llama_stack/distributions/postgres-demo/postgres_demo.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models import ModelType -from llama_stack.core.datatypes import ( - BuildProvider, - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.distributions.template import ( - DistributionTemplate, - RunConfigSettings, -) -from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig -from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig - - -def get_distribution_template() -> DistributionTemplate: - inference_providers = [ - Provider( - provider_id="vllm-inference", - provider_type="remote::vllm", - config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:=http://localhost:8000/v1}", - ), - ), - ] - providers = { - "inference": [ - BuildProvider(provider_type="remote::vllm"), - BuildProvider(provider_type="inline::sentence-transformers"), - ], - "vector_io": [BuildProvider(provider_type="remote::chromadb")], - "safety": [BuildProvider(provider_type="inline::llama-guard")], - "agents": [BuildProvider(provider_type="inline::meta-reference")], - "tool_runtime": [ - BuildProvider(provider_type="remote::brave-search"), - BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), - BuildProvider(provider_type="remote::model-context-protocol"), - ], - } - name = "postgres-demo" - - vector_io_providers = [ - Provider( - provider_id="${env.ENABLE_CHROMADB:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.CHROMADB_URL:=}", - ), - ), - ] - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - - default_models = [ - ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="vllm-inference", - ) - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - embedding_model = ModelInput( - model_id="nomic-embed-text-v1.5", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 768, - }, - ) - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider={}, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - storage_backends={ - "kv_default": PostgresKVStoreConfig.sample_run_config( - table_name="llamastack_kvstore", - ), - "sql_default": PostgresSqlStoreConfig.sample_run_config(), - }, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - }, - ) diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml deleted file mode 100644 index 0d7ecff48..000000000 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ /dev/null @@ -1,115 +0,0 @@ -version: 2 -image_name: postgres-demo -apis: -- agents -- inference -- safety -- tool_runtime -- vector_io -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -storage: - backends: - kv_default: - type: kv_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_default: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm - - metadata: - embedding_dimension: 768 - model_id: nomic-embed-text-v1.5 - provider_id: sentence-transformers - model_type: embedding - shields: - - shield_id: meta-llama/Llama-Guard-3-8B - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml deleted file mode 100644 index b2e2a0c85..000000000 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ /dev/null @@ -1,60 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for GPU-enabled environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::huggingface-gpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml deleted file mode 100644 index 92483c78e..000000000 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ /dev/null @@ -1,279 +0,0 @@ -version: 2 -image_name: starter-gpu -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: huggingface-gpu - provider_type: inline::huggingface-gpu - config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml deleted file mode 100644 index baa80ef3e..000000000 --- a/llama_stack/distributions/starter/build.yaml +++ /dev/null @@ -1,60 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for CPU-only environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml deleted file mode 100644 index 3b9d8f890..000000000 --- a/llama_stack/distributions/starter/run.yaml +++ /dev/null @@ -1,276 +0,0 @@ -version: 2 -image_name: starter -apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_type: remote::cerebras - config: - base_url: https://api.cerebras.ai - api_key: ${env.CEREBRAS_API_KEY:=} - - provider_id: ${env.OLLAMA_URL:+ollama} - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - - provider_id: ${env.VLLM_URL:+vllm} - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.TGI_URL:+tgi} - provider_type: remote::tgi - config: - url: ${env.TGI_URL:=} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:=} - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:=} - - provider_id: bedrock - provider_type: remote::bedrock - - provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_type: remote::nvidia - config: - url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:=} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - - provider_id: anthropic - provider_type: remote::anthropic - config: - api_key: ${env.ANTHROPIC_API_KEY:=} - - provider_id: gemini - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY:=} - - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:=} - - provider_id: sambanova - provider_type: remote::sambanova - config: - url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:=} - - provider_id: ${env.AZURE_API_KEY:+azure} - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY:=} - api_base: ${env.AZURE_API_BASE:=} - api_version: ${env.AZURE_API_VERSION:=} - api_type: ${env.AZURE_API_TYPE:=} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db - persistence: - namespace: vector_io::sqlite_vec - backend: kv_default - - provider_id: ${env.MILVUS_URL:+milvus} - provider_type: inline::milvus - config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db - persistence: - namespace: vector_io::milvus - backend: kv_default - - provider_id: ${env.CHROMADB_URL:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - persistence: - namespace: vector_io::chroma_remote - backend: kv_default - - provider_id: ${env.PGVECTOR_DB:+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:=localhost} - port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} - persistence: - namespace: vector_io::pgvector - backend: kv_default - - provider_id: ${env.QDRANT_URL:+qdrant} - provider_type: remote::qdrant - config: - api_key: ${env.QDRANT_API_KEY:=} - persistence: - namespace: vector_io::qdrant_remote - backend: kv_default - - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} - provider_type: remote::weaviate - config: - weaviate_api_key: null - weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} - persistence: - namespace: vector_io::weaviate - backend: kv_default - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} - metadata_store: - table_name: files_metadata - backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - provider_id: code-scanner - provider_type: inline::code-scanner - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - post_training: - - provider_id: torchtune-cpu - provider_type: inline::torchtune-cpu - config: - checkpoint_format: meta - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - batches: - - provider_id: reference - provider_type: inline::reference - config: - kvstore: - namespace: batches - backend: kv_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: [] - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true -vector_stores: - default_provider_id: faiss - default_embedding_model: - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py deleted file mode 100644 index c8c7101a6..000000000 --- a/llama_stack/distributions/starter/starter.py +++ /dev/null @@ -1,327 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from typing import Any - -from llama_stack.core.datatypes import ( - BuildProvider, - Provider, - ProviderSpec, - QualifiedModel, - ShieldInput, - ToolGroupInput, - VectorStoresConfig, -) -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings -from llama_stack.providers.datatypes import RemoteProviderSpec -from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) -from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig -from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig -from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( - SQLiteVectorIOConfig, -) -from llama_stack.providers.registry.inference import available_providers -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.remote.vector_io.pgvector.config import ( - PGVectorVectorIOConfig, -) -from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig -from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig - - -def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: - """Get configuration for a provider using its adapter's config class.""" - config_class = instantiate_class_type(provider_spec.config_class) - - if hasattr(config_class, "sample_run_config"): - config: dict[str, Any] = config_class.sample_run_config() - return config - return {} - - -ENABLED_INFERENCE_PROVIDERS = [ - "ollama", - "vllm", - "tgi", - "fireworks", - "together", - "gemini", - "vertexai", - "groq", - "sambanova", - "anthropic", - "openai", - "cerebras", - "nvidia", - "bedrock", - "azure", -] - -INFERENCE_PROVIDER_IDS = { - "ollama": "${env.OLLAMA_URL:+ollama}", - "vllm": "${env.VLLM_URL:+vllm}", - "tgi": "${env.TGI_URL:+tgi}", - "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}", - "nvidia": "${env.NVIDIA_API_KEY:+nvidia}", - "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}", - "azure": "${env.AZURE_API_KEY:+azure}", -} - - -def get_remote_inference_providers() -> list[Provider]: - # Filter out inline providers and some others - the starter distro only exposes remote providers - remote_providers = [ - provider - for provider in available_providers() - if isinstance(provider, RemoteProviderSpec) and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS - ] - - inference_providers = [] - for provider_spec in remote_providers: - provider_type = provider_spec.adapter_type - - if provider_type in INFERENCE_PROVIDER_IDS: - provider_id = INFERENCE_PROVIDER_IDS[provider_type] - else: - provider_id = provider_type.replace("-", "_").replace("::", "_") - config = _get_config_for_provider(provider_spec) - - inference_providers.append( - Provider( - provider_id=provider_id, - provider_type=f"remote::{provider_type}", - config=config, - ) - ) - return inference_providers - - -def get_distribution_template(name: str = "starter") -> DistributionTemplate: - remote_inference_providers = get_remote_inference_providers() - - providers = { - "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers] - + [BuildProvider(provider_type="inline::sentence-transformers")], - "vector_io": [ - BuildProvider(provider_type="inline::faiss"), - BuildProvider(provider_type="inline::sqlite-vec"), - BuildProvider(provider_type="inline::milvus"), - BuildProvider(provider_type="remote::chromadb"), - BuildProvider(provider_type="remote::pgvector"), - BuildProvider(provider_type="remote::qdrant"), - BuildProvider(provider_type="remote::weaviate"), - ], - "files": [BuildProvider(provider_type="inline::localfs")], - "safety": [ - BuildProvider(provider_type="inline::llama-guard"), - BuildProvider(provider_type="inline::code-scanner"), - ], - "agents": [BuildProvider(provider_type="inline::meta-reference")], - "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")], - "eval": [BuildProvider(provider_type="inline::meta-reference")], - "datasetio": [ - BuildProvider(provider_type="remote::huggingface"), - BuildProvider(provider_type="inline::localfs"), - ], - "scoring": [ - BuildProvider(provider_type="inline::basic"), - BuildProvider(provider_type="inline::llm-as-judge"), - BuildProvider(provider_type="inline::braintrust"), - ], - "tool_runtime": [ - BuildProvider(provider_type="remote::brave-search"), - BuildProvider(provider_type="remote::tavily-search"), - BuildProvider(provider_type="inline::rag-runtime"), - BuildProvider(provider_type="remote::model-context-protocol"), - ], - "batches": [ - BuildProvider(provider_type="inline::reference"), - ], - } - files_provider = Provider( - provider_id="meta-reference-files", - provider_type="inline::localfs", - config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ) - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - default_shields = [ - # if the - ShieldInput( - shield_id="llama-guard", - provider_id="${env.SAFETY_MODEL:+llama-guard}", - provider_shield_id="${env.SAFETY_MODEL:=}", - ), - ShieldInput( - shield_id="code-scanner", - provider_id="${env.CODE_SCANNER_MODEL:+code-scanner}", - provider_shield_id="${env.CODE_SCANNER_MODEL:=}", - ), - ] - - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers. This distribution is intended for CPU-only environments.", - container_image=None, - template_path=None, - providers=providers, - additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": remote_inference_providers + [embedding_provider], - "vector_io": [ - Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="sqlite-vec", - provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.MILVUS_URL:+milvus}", - provider_type="inline::milvus", - config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.CHROMADB_URL:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}/", - url="${env.CHROMADB_URL:=}", - ), - ), - Provider( - provider_id="${env.PGVECTOR_DB:+pgvector}", - provider_type="remote::pgvector", - config=PGVectorVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - db="${env.PGVECTOR_DB:=}", - user="${env.PGVECTOR_USER:=}", - password="${env.PGVECTOR_PASSWORD:=}", - ), - ), - Provider( - provider_id="${env.QDRANT_URL:+qdrant}", - provider_type="remote::qdrant", - config=QdrantVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.QDRANT_URL:=}", - ), - ), - Provider( - provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", - provider_type="remote::weaviate", - config=WeaviateVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", - ), - ), - ], - "files": [files_provider], - }, - default_models=[], - default_tool_groups=default_tool_groups, - default_shields=default_shields, - vector_stores_config=VectorStoresConfig( - default_provider_id="faiss", - default_embedding_model=QualifiedModel( - provider_id="sentence-transformers", - model_id="nomic-ai/nomic-embed-text-v1.5", - ), - ), - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - "FIREWORKS_API_KEY": ( - "", - "Fireworks API Key", - ), - "OPENAI_API_KEY": ( - "", - "OpenAI API Key", - ), - "GROQ_API_KEY": ( - "", - "Groq API Key", - ), - "ANTHROPIC_API_KEY": ( - "", - "Anthropic API Key", - ), - "GEMINI_API_KEY": ( - "", - "Gemini API Key", - ), - "VERTEX_AI_PROJECT": ( - "", - "Google Cloud Project ID for Vertex AI", - ), - "VERTEX_AI_LOCATION": ( - "us-central1", - "Google Cloud Location for Vertex AI", - ), - "SAMBANOVA_API_KEY": ( - "", - "SambaNova API Key", - ), - "VLLM_URL": ( - "http://localhost:8000/v1", - "vLLM URL", - ), - "VLLM_INFERENCE_MODEL": ( - "", - "Optional vLLM Inference Model to register on startup", - ), - "OLLAMA_URL": ( - "http://localhost:11434", - "Ollama URL", - ), - "AZURE_API_KEY": ( - "", - "Azure API Key", - ), - "AZURE_API_BASE": ( - "", - "Azure API Base", - ), - "AZURE_API_VERSION": ( - "", - "Azure API Version", - ), - "AZURE_API_TYPE": ( - "azure", - "Azure API Type", - ), - }, - ) diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml deleted file mode 100644 index ca3c8402d..000000000 --- a/llama_stack/distributions/watsonx/run.yaml +++ /dev/null @@ -1,133 +0,0 @@ -version: 2 -image_name: watsonx -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- tool_runtime -- vector_io -providers: - inference: - - provider_id: watsonx - provider_type: remote::watsonx - config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} - api_key: ${env.WATSONX_API_KEY:=} - project_id: ${env.WATSONX_PROJECT_ID:=} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - namespace: eval - backend: kv_default - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - namespace: datasetio::huggingface - backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - namespace: datasetio::localfs - backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:=} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db - sql_default: - type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: [] - shields: [] - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py deleted file mode 100644 index 11a5993e9..000000000 --- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# top-level folder for each specific model found within the models/ directory at -# the top-level of this source tree. - -import textwrap -from datetime import datetime -from typing import Any - -from llama_stack.apis.inference import ( - BuiltinTool, - ToolDefinition, -) - -from .base import PromptTemplate, PromptTemplateGeneratorBase - - -class SystemDefaultGenerator(PromptTemplateGeneratorBase): - def gen(self, *args, **kwargs) -> PromptTemplate: - template_str = textwrap.dedent( - """ - Cutting Knowledge Date: December 2023 - Today Date: {{ today }} - """ - ) - return PromptTemplate( - template_str.lstrip("\n"), - { - "today": datetime.now().strftime("%d %B %Y") # noqa: DTZ005 - we don't care about timezones here since we are displaying the date - }, - ) - - def data_examples(self) -> list[Any]: - return [None] - - -class BuiltinToolGenerator(PromptTemplateGeneratorBase): - def _tool_breakdown(self, tools: list[ToolDefinition]): - builtin_tools, custom_tools = [], [] - for dfn in tools: - if isinstance(dfn.tool_name, BuiltinTool): - builtin_tools.append(dfn) - else: - custom_tools.append(dfn) - - return builtin_tools, custom_tools - - def gen(self, tools: list[ToolDefinition]) -> PromptTemplate: - builtin_tools, custom_tools = self._tool_breakdown(tools) - template_str = textwrap.dedent( - """ - {% if builtin_tools or custom_tools -%} - Environment: ipython - {% endif -%} - {% set builtin_tools = builtin_tools | reject('equalto', 'code_interpreter') | list -%} - {% if builtin_tools -%} - Tools: {{ builtin_tools | join(", ") | trim -}} - {% endif %} - """ - ) - return PromptTemplate( - template_str.lstrip("\n"), - { - "builtin_tools": [t.tool_name.value for t in builtin_tools], - "custom_tools": custom_tools, - }, - ) - - def data_examples(self) -> list[list[ToolDefinition]]: - return [ - # builtin tools - [ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ToolDefinition(tool_name=BuiltinTool.brave_search), - ToolDefinition(tool_name=BuiltinTool.wolfram_alpha), - ], - # only code interpretor - [ - ToolDefinition(tool_name=BuiltinTool.code_interpreter), - ], - ] - - -class JsonCustomToolGenerator(PromptTemplateGeneratorBase): - def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: - template_str = textwrap.dedent( - """ - Answer the user's question by making use of the following functions if needed. - If none of the function can be used, please say so. - Here is a list of functions in JSON format: - {% for t in custom_tools -%} - {# manually setting up JSON because jinja sorts keys in unexpected ways -#} - {%- set tname = t.tool_name -%} - {%- set tdesc = t.description -%} - {%- set tprops = t.input_schema.get('properties', {}) -%} - {%- set required_params = t.input_schema.get('required', []) -%} - { - "type": "function", - "function": { - "name": "{{tname}}", - "description": "{{tdesc}}", - "parameters": { - "type": "object", - "properties": [ - {%- for name, param in tprops.items() %} - { - "{{name}}": { - "type": "object", - "description": "{{param.get('description', '')}}" - } - }{% if not loop.last %},{% endif %} - {%- endfor %} - ], - "required": {{ required_params | tojson }} - } - } - } - {% endfor %} - Return function calls in JSON format. - """ - ) - - return PromptTemplate( - template_str.lstrip("\n"), - {"custom_tools": [t.model_dump() for t in custom_tools]}, - ) - - def data_examples(self) -> list[list[ToolDefinition]]: - return [ - [ - ToolDefinition( - tool_name="trending_songs", - description="Returns the trending songs on a Music site", - input_schema={ - "type": "object", - "properties": { - "n": { - "type": "int", - "description": "The number of songs to return", - }, - "genre": { - "type": "str", - "description": "The genre of the songs to return", - }, - }, - "required": ["n"], - }, - ), - ] - ] - - -class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase): - def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: - template_str = textwrap.dedent( - """ - You have access to the following functions: - - {% for t in custom_tools %} - {#- manually setting up JSON because jinja sorts keys in unexpected ways -#} - {%- set tname = t.tool_name -%} - {%- set tdesc = t.description -%} - {%- set tprops = t.input_schema.get('properties', {}) -%} - {%- set modified_params = {} -%} - {%- for key, value in tprops.items() -%} - {%- set param_copy = value.copy() -%} - {%- if 'default' in param_copy -%} - {%- set _ = param_copy.pop('default', None) -%} - {%- endif -%} - {%- set _ = modified_params.update({key: param_copy}) -%} - {%- endfor -%} - {%- set tparams = modified_params | tojson -%} - Use the function '{{ tname }}' to '{{ tdesc }}': - {"name": "{{tname}}", "description": "{{tdesc}}", "parameters": {{tparams}}} - - {% endfor -%} - Think very carefully before calling functions. - If you choose to call a function ONLY reply in the following format with no prefix or suffix: - - {"example_name": "example_value"} - - Reminder: - - If looking for real time information use relevant functions before falling back to brave_search - - Function calls MUST follow the specified format, start with - - Required parameters MUST be specified - - Only call one function at a time - - Put the entire function call reply on one line - """ - ) - return PromptTemplate( - template_str.lstrip("\n"), - {"custom_tools": [t.model_dump() for t in custom_tools]}, - ) - - def data_examples(self) -> list[list[ToolDefinition]]: - return [ - [ - ToolDefinition( - tool_name="trending_songs", - description="Returns the trending songs on a Music site", - input_schema={ - "type": "object", - "properties": { - "n": { - "type": "int", - "description": "The number of songs to return", - }, - "genre": { - "type": "str", - "description": "The genre of the songs to return", - }, - }, - "required": ["n"], - }, - ), - ] - ] - - -class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 - DEFAULT_PROMPT = textwrap.dedent( - """ - You are a helpful assistant. You have access to functions, but you should only use them if they are required. - You are an expert in composing functions. You are given a question and a set of possible functions. - Based on the question, you may or may not need to make one function/tool call to achieve the purpose. - - If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] - If you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format. - For a boolean parameter, be sure to use `True` or `False` (capitalized) for the value. - - - {{ function_description }} - """.strip("\n") - ) - - def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate: - system_prompt = system_prompt or self.DEFAULT_PROMPT - return PromptTemplate( - system_prompt, - {"function_description": self._gen_function_description(custom_tools)}, - ) - - def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> str: - template_str = textwrap.dedent( - """ - Here is a list of functions in JSON format that you can invoke. - - [ - {% for t in tools -%} - {# manually setting up JSON because jinja sorts keys in unexpected ways -#} - {%- set tname = t.tool_name -%} - {%- set tdesc = t.description -%} - {%- set tprops = (t.input_schema or {}).get('properties', {}) -%} - {%- set required_params = (t.input_schema or {}).get('required', []) -%} - { - "name": "{{tname}}", - "description": "{{tdesc}}", - "parameters": { - "type": "dict", - "required": {{ required_params | tojson }}, - "properties": { - {%- for name, param in tprops.items() %} - "{{name}}": { - "type": "{{param.get('type', 'string')}}", - "description": "{{param.get('description', '')}}"{% if param.get('default') %}, - "default": "{{param.get('default')}}"{% endif %} - }{% if not loop.last %},{% endif %} - {%- endfor %} - } - } - }{% if not loop.last %}, - {% endif -%} - {%- endfor %} - ] - - You can answer general questions or invoke tools when necessary. - In addition to tool calls, you should also augment your responses by using the tool outputs. - - """ - ) - template = PromptTemplate( - template_str.strip("\n"), - {"tools": [t.model_dump() for t in custom_tools]}, - ) - rendered: str = template.render() - return rendered - - def data_examples(self) -> list[list[ToolDefinition]]: - return [ - [ - ToolDefinition( - tool_name="get_weather", - description="Get weather info for places", - input_schema={ - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The name of the city to get the weather for", - }, - "metric": { - "type": "string", - "description": "The metric for weather. Options are: celsius, fahrenheit", - "default": "celsius", - }, - }, - "required": ["city"], - }, - ), - ] - ] diff --git a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py deleted file mode 100644 index 1ee570933..000000000 --- a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# top-level folder for each specific model found within the models/ directory at -# the top-level of this source tree. - -import textwrap - -from llama_stack.apis.inference import ToolDefinition -from llama_stack.models.llama.llama3.prompt_templates.base import ( - PromptTemplate, - PromptTemplateGeneratorBase, -) - - -class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 - DEFAULT_PROMPT = textwrap.dedent( - """ - You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines: - - 1. FUNCTION CALLS: - - ONLY use functions that are EXPLICITLY listed in the function list below - - If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information" - - If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information" - - If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s) - - Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)] - Examples: - CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list - INCORRECT: get_weather(location="New York") - INCORRECT: Let me check the weather: [get_weather(location="New York")] - INCORRECT: [get_events(location="Singapore")] <- If function not in list - - 2. RESPONSE RULES: - - For pure function requests matching a listed function: ONLY output the function call(s) - - For knowledge questions: ONLY output text - - For missing parameters: ONLY request the specific missing parameters - - For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call. - - If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations - - NEVER combine text and function calls in the same response - - NEVER suggest alternative functions when the requested service is unavailable - - NEVER create or invent new functions not listed below - - 3. STRICT BOUNDARIES: - - ONLY use functions from the list below - no exceptions - - NEVER use a function as an alternative to unavailable information - - NEVER call functions not present in the function list - - NEVER add explanatory text to function calls - - NEVER respond with empty brackets - - Use proper Python/JSON syntax for function calls - - Check the function list carefully before responding - - 4. TOOL RESPONSE HANDLING: - - When receiving tool responses: provide concise, natural language responses - - Don't repeat tool response verbatim - - Don't add supplementary information - - {{ function_description }} - """.strip("\n") - ) - - def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate: - system_prompt = system_prompt or self.DEFAULT_PROMPT - return PromptTemplate( - system_prompt, - {"function_description": self._gen_function_description(custom_tools)}, - ) - - def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: - template_str = textwrap.dedent( - """ - Here is a list of functions in JSON format that you can invoke: - [ - {% for t in tools -%} - {# manually setting up JSON because jinja sorts keys in unexpected ways -#} - {%- set tname = t.tool_name -%} - {%- set tdesc = t.description -%} - {%- set tprops = t.input_schema.get('properties', {}) -%} - {%- set required_params = t.input_schema.get('required', []) -%} - { - "name": "{{tname}}", - "description": "{{tdesc}}", - "parameters": { - "type": "dict", - "required": {{ required_params | tojson }}, - "properties": { - {%- for name, param in tprops.items() %} - "{{name}}": { - "type": "{{param.get('type', 'string')}}", - "description": "{{param.get('description', '')}}"{% if param.get('default') %}, - "default": "{{param.get('default')}}"{% endif %} - }{% if not loop.last %},{% endif %} - {%- endfor %} - } - } - }{% if not loop.last %}, - {% endif -%} - {%- endfor %} - ] - """ - ) - return PromptTemplate( - template_str.strip("\n"), - {"tools": [t.model_dump() for t in custom_tools]}, - ).render() - - def data_examples(self) -> list[list[ToolDefinition]]: - return [ - [ - ToolDefinition( - tool_name="get_weather", - description="Get weather info for places", - input_schema={ - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The name of the city to get the weather for", - }, - "metric": { - "type": "string", - "description": "The metric for weather. Options are: celsius, fahrenheit", - "default": "celsius", - }, - }, - "required": ["city"], - }, - ), - ] - ] diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py deleted file mode 100644 index 9be3edb8e..000000000 --- a/llama_stack/providers/datatypes.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import StrEnum -from typing import Any, Protocol -from urllib.parse import urlparse - -from pydantic import BaseModel, Field - -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasets import Dataset -from llama_stack.apis.datatypes import Api -from llama_stack.apis.models import Model -from llama_stack.apis.scoring_functions import ScoringFn -from llama_stack.apis.shields import Shield -from llama_stack.apis.tools import ToolGroup -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.schema_utils import json_schema_type - - -class ModelsProtocolPrivate(Protocol): - """ - Protocol for model management. - - This allows users to register their preferred model identifiers. - - Model registration requires - - - a provider, used to route the registration request - - a model identifier, user's intended name for the model during inference - - a provider model identifier, a model identifier supported by the provider - - Providers will only accept registration for provider model ids they support. - - Example, - register: provider x my-model-id x provider-model-id - -> Error if provider does not support provider-model-id - -> Error if my-model-id is already registered - -> Success if provider supports provider-model-id - inference: my-model-id x ... - -> Provider uses provider-model-id for inference - """ - - # this should be called `on_model_register` or something like that. - # the provider should _not_ be able to change the object in this - # callback - async def register_model(self, model: Model) -> Model: ... - - async def unregister_model(self, model_id: str) -> None: ... - - # the Stack router will query each provider for their list of models - # if a `refresh_interval_seconds` is provided, this method will be called - # periodically to refresh the list of models - # - # NOTE: each model returned will be registered with the model registry. this means - # a callback to the `register_model()` method will be made. this is duplicative and - # may be removed in the future. - async def list_models(self) -> list[Model] | None: ... - - async def should_refresh_models(self) -> bool: ... - - -class ShieldsProtocolPrivate(Protocol): - async def register_shield(self, shield: Shield) -> None: ... - - async def unregister_shield(self, identifier: str) -> None: ... - - -class VectorStoresProtocolPrivate(Protocol): - async def register_vector_store(self, vector_store: VectorStore) -> None: ... - - async def unregister_vector_store(self, vector_store_id: str) -> None: ... - - -class DatasetsProtocolPrivate(Protocol): - async def register_dataset(self, dataset: Dataset) -> None: ... - - async def unregister_dataset(self, dataset_id: str) -> None: ... - - -class ScoringFunctionsProtocolPrivate(Protocol): - async def list_scoring_functions(self) -> list[ScoringFn]: ... - - async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ... - - -class BenchmarksProtocolPrivate(Protocol): - async def register_benchmark(self, benchmark: Benchmark) -> None: ... - - -class ToolGroupsProtocolPrivate(Protocol): - async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ... - - async def unregister_toolgroup(self, toolgroup_id: str) -> None: ... - - -@json_schema_type -class ProviderSpec(BaseModel): - api: Api - provider_type: str - config_class: str = Field( - ..., - description="Fully-qualified classname of the config for this provider", - ) - api_dependencies: list[Api] = Field( - default_factory=list, - description="Higher-level API surfaces may depend on other providers to provide their functionality", - ) - optional_api_dependencies: list[Api] = Field( - default_factory=list, - ) - deprecation_warning: str | None = Field( - default=None, - description="If this provider is deprecated, specify the warning message here", - ) - deprecation_error: str | None = Field( - default=None, - description="If this provider is deprecated and does NOT work, specify the error message here", - ) - - module: str | None = Field( - default=None, - description=""" - Fully-qualified name of the module to import. The module is expected to have: - - - `get_adapter_impl(config, deps)`: returns the adapter implementation - - Example: `module: ramalama_stack` - """, - ) - - pip_packages: list[str] = Field( - default_factory=list, - description="The pip dependencies needed for this implementation", - ) - - provider_data_validator: str | None = Field( - default=None, - ) - - is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.") - - # used internally by the resolver; this is a hack for now - deps__: list[str] = Field(default_factory=list) - - @property - def is_sample(self) -> bool: - return self.provider_type in ("sample", "remote::sample") - - -class RoutingTable(Protocol): - async def get_provider_impl(self, routing_key: str) -> Any: ... - - -@json_schema_type -class InlineProviderSpec(ProviderSpec): - container_image: str | None = Field( - default=None, - description=""" -The container image to use for this implementation. If one is provided, pip_packages will be ignored. -If a provider depends on other providers, the dependencies MUST NOT specify a container image. -""", - ) - description: str | None = Field( - default=None, - description=""" -A description of the provider. This is used to display in the documentation. -""", - ) - - -class RemoteProviderConfig(BaseModel): - host: str = "localhost" - port: int | None = None - protocol: str = "http" - - @property - def url(self) -> str: - if self.port is None: - return f"{self.protocol}://{self.host}" - return f"{self.protocol}://{self.host}:{self.port}" - - @classmethod - def from_url(cls, url: str) -> "RemoteProviderConfig": - parsed = urlparse(url) - attrs = {k: v for k, v in parsed._asdict().items() if v is not None} - return cls(**attrs) - - -@json_schema_type -class RemoteProviderSpec(ProviderSpec): - adapter_type: str = Field( - ..., - description="Unique identifier for this adapter", - ) - - description: str | None = Field( - default=None, - description=""" -A description of the provider. This is used to display in the documentation. -""", - ) - - @property - def container_image(self) -> str | None: - return None - - -class HealthStatus(StrEnum): - OK = "OK" - ERROR = "Error" - NOT_IMPLEMENTED = "Not Implemented" - - -HealthResponse = dict[str, Any] diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py deleted file mode 100644 index 91287617a..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.core.datatypes import AccessRule, Api - -from .config import MetaReferenceAgentsImplConfig - - -async def get_provider_impl( - config: MetaReferenceAgentsImplConfig, - deps: dict[Api, Any], - policy: list[AccessRule], - telemetry_enabled: bool = False, -): - from .agents import MetaReferenceAgentsImpl - - impl = MetaReferenceAgentsImpl( - config, - deps[Api.inference], - deps[Api.vector_io], - deps[Api.safety], - deps[Api.tool_runtime], - deps[Api.tool_groups], - deps[Api.conversations], - policy, - telemetry_enabled, - ) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py deleted file mode 100644 index 96f271669..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ /dev/null @@ -1,1024 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import copy -import json -import re -import uuid -import warnings -from collections.abc import AsyncGenerator -from datetime import UTC, datetime - -import httpx - -from llama_stack.apis.agents import ( - AgentConfig, - AgentToolGroup, - AgentToolGroupWithArgs, - AgentTurnCreateRequest, - AgentTurnResponseEvent, - AgentTurnResponseEventType, - AgentTurnResponseStepCompletePayload, - AgentTurnResponseStepProgressPayload, - AgentTurnResponseStepStartPayload, - AgentTurnResponseStreamChunk, - AgentTurnResponseTurnAwaitingInputPayload, - AgentTurnResponseTurnCompletePayload, - AgentTurnResumeRequest, - Attachment, - Document, - InferenceStep, - ShieldCallStep, - StepType, - ToolExecutionStep, - Turn, -) -from llama_stack.apis.common.content_types import ( - URL, - TextContentItem, - ToolCallDelta, - ToolCallParseStatus, -) -from llama_stack.apis.common.errors import SessionNotFoundError -from llama_stack.apis.inference import ( - ChatCompletionResponseEventType, - CompletionMessage, - Inference, - Message, - OpenAIAssistantMessageParam, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIDeveloperMessageParam, - OpenAIMessageParam, - OpenAISystemMessageParam, - OpenAIToolMessageParam, - OpenAIUserMessageParam, - SamplingParams, - StopReason, - SystemMessage, - ToolDefinition, - ToolResponse, - ToolResponseMessage, - UserMessage, -) -from llama_stack.apis.safety import Safety -from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.core.datatypes import AccessRule -from llama_stack.log import get_logger -from llama_stack.models.llama.datatypes import ( - BuiltinTool, - ToolCall, -) -from llama_stack.providers.utils.inference.openai_compat import ( - convert_message_to_openai_dict_new, - convert_openai_chat_completion_stream, - convert_tooldef_to_openai_tool, -) -from llama_stack.providers.utils.kvstore import KVStore -from llama_stack.providers.utils.telemetry import tracing - -from .persistence import AgentPersistence -from .safety import SafetyException, ShieldRunnerMixin - -TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})") -MEMORY_QUERY_TOOL = "knowledge_search" -WEB_SEARCH_TOOL = "web_search" -RAG_TOOL_GROUP = "builtin::rag" - -logger = get_logger(name=__name__, category="agents::meta_reference") - - -class ChatAgent(ShieldRunnerMixin): - def __init__( - self, - agent_id: str, - agent_config: AgentConfig, - inference_api: Inference, - safety_api: Safety, - tool_runtime_api: ToolRuntime, - tool_groups_api: ToolGroups, - vector_io_api: VectorIO, - persistence_store: KVStore, - created_at: str, - policy: list[AccessRule], - telemetry_enabled: bool = False, - ): - self.agent_id = agent_id - self.agent_config = agent_config - self.inference_api = inference_api - self.safety_api = safety_api - self.vector_io_api = vector_io_api - self.storage = AgentPersistence(agent_id, persistence_store, policy) - self.tool_runtime_api = tool_runtime_api - self.tool_groups_api = tool_groups_api - self.created_at = created_at - self.telemetry_enabled = telemetry_enabled - - ShieldRunnerMixin.__init__( - self, - safety_api, - input_shields=agent_config.input_shields, - output_shields=agent_config.output_shields, - ) - - def turn_to_messages(self, turn: Turn) -> list[Message]: - messages = [] - - # NOTE: if a toolcall response is in a step, we do not add it when processing the input messages - tool_call_ids = set() - for step in turn.steps: - if step.step_type == StepType.tool_execution.value: - for response in step.tool_responses: - tool_call_ids.add(response.call_id) - - for m in turn.input_messages: - msg = m.model_copy() - # We do not want to keep adding RAG context to the input messages - # May be this should be a parameter of the agentic instance - # that can define its behavior in a custom way - if isinstance(msg, UserMessage): - msg.context = None - if isinstance(msg, ToolResponseMessage): - if msg.call_id in tool_call_ids: - # NOTE: do not add ToolResponseMessage here, we'll add them in tool_execution steps - continue - - messages.append(msg) - - for step in turn.steps: - if step.step_type == StepType.inference.value: - messages.append(step.model_response) - elif step.step_type == StepType.tool_execution.value: - for response in step.tool_responses: - messages.append( - ToolResponseMessage( - call_id=response.call_id, - content=response.content, - ) - ) - elif step.step_type == StepType.shield_call.value: - if step.violation: - # CompletionMessage itself in the ShieldResponse - messages.append( - CompletionMessage( - content=step.violation.user_message, - stop_reason=StopReason.end_of_turn, - ) - ) - return messages - - async def create_session(self, name: str) -> str: - return await self.storage.create_session(name) - - async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]: - messages = [] - if self.agent_config.instructions != "": - messages.append(SystemMessage(content=self.agent_config.instructions)) - - for turn in turns: - messages.extend(self.turn_to_messages(turn)) - return messages - - async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator: - turn_id = str(uuid.uuid4()) - if self.telemetry_enabled: - span = tracing.get_current_span() - if span is not None: - span.set_attribute("session_id", request.session_id) - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) - - await self._initialize_tools(request.toolgroups) - async for chunk in self._run_turn(request, turn_id): - yield chunk - - async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator: - if self.telemetry_enabled: - span = tracing.get_current_span() - if span is not None: - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("session_id", request.session_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", request.turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) - - await self._initialize_tools() - async for chunk in self._run_turn(request): - yield chunk - - async def _run_turn( - self, - request: AgentTurnCreateRequest | AgentTurnResumeRequest, - turn_id: str | None = None, - ) -> AsyncGenerator: - assert request.stream is True, "Non-streaming not supported" - - is_resume = isinstance(request, AgentTurnResumeRequest) - session_info = await self.storage.get_session_info(request.session_id) - if session_info is None: - raise SessionNotFoundError(request.session_id) - - turns = await self.storage.get_session_turns(request.session_id) - if is_resume and len(turns) == 0: - raise ValueError("No turns found for session") - - steps = [] - messages = await self.get_messages_from_turns(turns) - if is_resume: - tool_response_messages = [ - ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses - ] - messages.extend(tool_response_messages) - last_turn = turns[-1] - last_turn_messages = self.turn_to_messages(last_turn) - last_turn_messages = [ - x for x in last_turn_messages if isinstance(x, UserMessage) or isinstance(x, ToolResponseMessage) - ] - last_turn_messages.extend(tool_response_messages) - - # get steps from the turn - steps = last_turn.steps - - # mark tool execution step as complete - # if there's no tool execution in progress step (due to storage, or tool call parsing on client), - # we'll create a new tool execution step with current time - in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step( - request.session_id, request.turn_id - ) - now = datetime.now(UTC).isoformat() - tool_execution_step = ToolExecutionStep( - step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())), - turn_id=request.turn_id, - tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []), - tool_responses=request.tool_responses, - completed_at=now, - started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now), - ) - steps.append(tool_execution_step) - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.tool_execution.value, - step_id=tool_execution_step.step_id, - step_details=tool_execution_step, - ) - ) - ) - input_messages = last_turn.input_messages - - turn_id = request.turn_id - start_time = last_turn.started_at - else: - messages.extend(request.messages) - start_time = datetime.now(UTC).isoformat() - input_messages = request.messages - - output_message = None - async for chunk in self.run( - session_id=request.session_id, - turn_id=turn_id, - input_messages=messages, - sampling_params=self.agent_config.sampling_params, - stream=request.stream, - documents=request.documents if not is_resume else None, - ): - if isinstance(chunk, CompletionMessage): - output_message = chunk - continue - - assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}" - event = chunk.event - if event.payload.event_type == AgentTurnResponseEventType.step_complete.value: - steps.append(event.payload.step_details) - - yield chunk - - assert output_message is not None - - turn = Turn( - turn_id=turn_id, - session_id=request.session_id, - input_messages=input_messages, - output_message=output_message, - started_at=start_time, - completed_at=datetime.now(UTC).isoformat(), - steps=steps, - ) - await self.storage.add_turn_to_session(request.session_id, turn) - if output_message.tool_calls: - chunk = AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseTurnAwaitingInputPayload( - turn=turn, - ) - ) - ) - else: - chunk = AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseTurnCompletePayload( - turn=turn, - ) - ) - ) - - yield chunk - - async def run( - self, - session_id: str, - turn_id: str, - input_messages: list[Message], - sampling_params: SamplingParams, - stream: bool = False, - documents: list[Document] | None = None, - ) -> AsyncGenerator: - # Doing async generators makes downstream code much simpler and everything amenable to - # streaming. However, it also makes things complicated here because AsyncGenerators cannot - # return a "final value" for the `yield from` statement. we simulate that by yielding a - # final boolean (to see whether an exception happened) and then explicitly testing for it. - - if len(self.input_shields) > 0: - async for res in self.run_multiple_shields_wrapper( - turn_id, input_messages, self.input_shields, "user-input" - ): - if isinstance(res, bool): - return - else: - yield res - - async for res in self._run( - session_id, - turn_id, - input_messages, - sampling_params, - stream, - documents, - ): - if isinstance(res, bool): - return - elif isinstance(res, CompletionMessage): - final_response = res - break - else: - yield res - - assert final_response is not None - # for output shields run on the full input and output combination - messages = input_messages + [final_response] - - if len(self.output_shields) > 0: - async for res in self.run_multiple_shields_wrapper( - turn_id, messages, self.output_shields, "assistant-output" - ): - if isinstance(res, bool): - return - else: - yield res - - yield final_response - - async def run_multiple_shields_wrapper( - self, - turn_id: str, - messages: list[Message], - shields: list[str], - touchpoint: str, - ) -> AsyncGenerator: - async with tracing.span("run_shields") as span: - if self.telemetry_enabled and span is not None: - span.set_attribute("input", [m.model_dump_json() for m in messages]) - if len(shields) == 0: - span.set_attribute("output", "no shields") - - if len(shields) == 0: - return - - step_id = str(uuid.uuid4()) - shield_call_start_time = datetime.now(UTC).isoformat() - try: - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepStartPayload( - step_type=StepType.shield_call.value, - step_id=step_id, - metadata=dict(touchpoint=touchpoint), - ) - ) - ) - await self.run_multiple_shields(messages, shields) - - except SafetyException as e: - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.shield_call.value, - step_id=step_id, - step_details=ShieldCallStep( - step_id=step_id, - turn_id=turn_id, - violation=e.violation, - started_at=shield_call_start_time, - completed_at=datetime.now(UTC).isoformat(), - ), - ) - ) - ) - if self.telemetry_enabled and span is not None: - span.set_attribute("output", e.violation.model_dump_json()) - - yield CompletionMessage( - content=str(e), - stop_reason=StopReason.end_of_turn, - ) - yield False - - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.shield_call.value, - step_id=step_id, - step_details=ShieldCallStep( - step_id=step_id, - turn_id=turn_id, - violation=None, - started_at=shield_call_start_time, - completed_at=datetime.now(UTC).isoformat(), - ), - ) - ) - ) - if self.telemetry_enabled and span is not None: - span.set_attribute("output", "no violations") - - async def _run( - self, - session_id: str, - turn_id: str, - input_messages: list[Message], - sampling_params: SamplingParams, - stream: bool = False, - documents: list[Document] | None = None, - ) -> AsyncGenerator: - # if document is passed in a turn, we parse the raw text of the document - # and sent it as a user message - if documents: - contexts = [] - for document in documents: - raw_document_text = await get_raw_document_text(document) - contexts.append(raw_document_text) - - attached_context = "\n".join(contexts) - if isinstance(input_messages[-1].content, str): - input_messages[-1].content += attached_context - elif isinstance(input_messages[-1].content, list): - input_messages[-1].content.append(TextContentItem(text=attached_context)) - else: - input_messages[-1].content = [ - input_messages[-1].content, - TextContentItem(text=attached_context), - ] - - session_info = await self.storage.get_session_info(session_id) - # if the session has a memory bank id, let the memory tool use it - if session_info and session_info.vector_db_id: - for tool_name in self.tool_name_to_args.keys(): - if tool_name == MEMORY_QUERY_TOOL: - if "vector_db_ids" not in self.tool_name_to_args[tool_name]: - self.tool_name_to_args[tool_name]["vector_db_ids"] = [session_info.vector_db_id] - else: - self.tool_name_to_args[tool_name]["vector_db_ids"].append(session_info.vector_db_id) - - output_attachments = [] - - n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0 - - # Build a map of custom tools to their definitions for faster lookup - client_tools = {} - for tool in self.agent_config.client_tools: - client_tools[tool.name] = tool - while True: - step_id = str(uuid.uuid4()) - inference_start_time = datetime.now(UTC).isoformat() - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepStartPayload( - step_type=StepType.inference.value, - step_id=step_id, - ) - ) - ) - - tool_calls = [] - content = "" - stop_reason: StopReason | None = None - - async with tracing.span("inference") as span: - if self.telemetry_enabled and span is not None: - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) - - def _serialize_nested(value): - """Recursively serialize nested Pydantic models to dicts.""" - from pydantic import BaseModel - - if isinstance(value, BaseModel): - return value.model_dump(mode="json") - elif isinstance(value, dict): - return {k: _serialize_nested(v) for k, v in value.items()} - elif isinstance(value, list): - return [_serialize_nested(item) for item in value] - else: - return value - - def _add_type(openai_msg: dict) -> OpenAIMessageParam: - # Serialize any nested Pydantic models to plain dicts - openai_msg = _serialize_nested(openai_msg) - - role = openai_msg.get("role") - if role == "user": - return OpenAIUserMessageParam(**openai_msg) - elif role == "system": - return OpenAISystemMessageParam(**openai_msg) - elif role == "assistant": - return OpenAIAssistantMessageParam(**openai_msg) - elif role == "tool": - return OpenAIToolMessageParam(**openai_msg) - elif role == "developer": - return OpenAIDeveloperMessageParam(**openai_msg) - else: - raise ValueError(f"Unknown message role: {role}") - - # Convert messages to OpenAI format - openai_messages: list[OpenAIMessageParam] = [ - _add_type(await convert_message_to_openai_dict_new(message)) for message in input_messages - ] - - # Convert tool definitions to OpenAI format - openai_tools = [convert_tooldef_to_openai_tool(x) for x in (self.tool_defs or [])] - - # Extract tool_choice from tool_config for OpenAI compatibility - # Note: tool_choice can only be provided when tools are also provided - tool_choice = None - if openai_tools and self.agent_config.tool_config and self.agent_config.tool_config.tool_choice: - tc = self.agent_config.tool_config.tool_choice - tool_choice_str = tc.value if hasattr(tc, "value") else str(tc) - # Convert tool_choice to OpenAI format - if tool_choice_str in ("auto", "none", "required"): - tool_choice = tool_choice_str - else: - # It's a specific tool name, wrap it in the proper format - tool_choice = {"type": "function", "function": {"name": tool_choice_str}} - - # Convert sampling params to OpenAI format (temperature, top_p, max_tokens) - temperature = getattr(getattr(sampling_params, "strategy", None), "temperature", None) - top_p = getattr(getattr(sampling_params, "strategy", None), "top_p", None) - max_tokens = getattr(sampling_params, "max_tokens", None) - - # Use OpenAI chat completion - params = OpenAIChatCompletionRequestWithExtraBody( - model=self.agent_config.model, - messages=openai_messages, - tools=openai_tools if openai_tools else None, - tool_choice=tool_choice, - response_format=self.agent_config.response_format, - temperature=temperature, - top_p=top_p, - max_tokens=max_tokens, - stream=True, - ) - openai_stream = await self.inference_api.openai_chat_completion(params) - - # Convert OpenAI stream back to Llama Stack format - response_stream = convert_openai_chat_completion_stream( - openai_stream, enable_incremental_tool_calls=True - ) - - async for chunk in response_stream: - event = chunk.event - if event.event_type == ChatCompletionResponseEventType.start: - continue - elif event.event_type == ChatCompletionResponseEventType.complete: - stop_reason = event.stop_reason or StopReason.end_of_turn - continue - - delta = event.delta - if delta.type == "tool_call": - if delta.parse_status == ToolCallParseStatus.succeeded: - tool_calls.append(delta.tool_call) - elif delta.parse_status == ToolCallParseStatus.failed: - # If we cannot parse the tools, set the content to the unparsed raw text - content = str(delta.tool_call) - if stream: - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepProgressPayload( - step_type=StepType.inference.value, - step_id=step_id, - delta=delta, - ) - ) - ) - - elif delta.type == "text": - content += delta.text - if stream and event.stop_reason is None: - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepProgressPayload( - step_type=StepType.inference.value, - step_id=step_id, - delta=delta, - ) - ) - ) - else: - raise ValueError(f"Unexpected delta type {type(delta)}") - - if self.telemetry_enabled and span is not None: - span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn) - span.set_attribute( - "input", - json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), - ) - output_attr = json.dumps( - { - "content": content, - "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], - } - ) - span.set_attribute("output", output_attr) - - n_iter += 1 - await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter) - - stop_reason = stop_reason or StopReason.out_of_tokens - - # If tool calls are parsed successfully, - # if content is not made null the tool call str will also be in the content - # and tokens will have tool call syntax included twice - if tool_calls: - content = "" - - message = CompletionMessage( - content=content, - stop_reason=stop_reason, - tool_calls=tool_calls, - ) - - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.inference.value, - step_id=step_id, - step_details=InferenceStep( - # somewhere deep, we are re-assigning message or closing over some - # variable which causes message to mutate later on. fix with a - # `deepcopy` for now, but this is symptomatic of a deeper issue. - step_id=step_id, - turn_id=turn_id, - model_response=copy.deepcopy(message), - started_at=inference_start_time, - completed_at=datetime.now(UTC).isoformat(), - ), - ) - ) - ) - - if n_iter >= self.agent_config.max_infer_iters: - logger.info(f"done with MAX iterations ({n_iter}), exiting.") - # NOTE: mark end_of_turn to indicate to client that we are done with the turn - # Do not continue the tool call loop after this point - message.stop_reason = StopReason.end_of_turn - yield message - break - - if stop_reason == StopReason.out_of_tokens: - logger.info("out of token budget, exiting.") - yield message - break - - if len(message.tool_calls) == 0: - if stop_reason == StopReason.end_of_turn: - # TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS) - if len(output_attachments) > 0: - if isinstance(message.content, list): - message.content += output_attachments - else: - message.content = [message.content] + output_attachments - yield message - else: - logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}") - input_messages = input_messages + [message] - else: - input_messages = input_messages + [message] - - # Process tool calls in the message - client_tool_calls = [] - non_client_tool_calls = [] - - # Separate client and non-client tool calls - for tool_call in message.tool_calls: - if tool_call.tool_name in client_tools: - client_tool_calls.append(tool_call) - else: - non_client_tool_calls.append(tool_call) - - # Process non-client tool calls first - for tool_call in non_client_tool_calls: - step_id = str(uuid.uuid4()) - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepStartPayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - ) - ) - ) - - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepProgressPayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - delta=ToolCallDelta( - parse_status=ToolCallParseStatus.in_progress, - tool_call=tool_call, - ), - ) - ) - ) - - # Execute the tool call - async with tracing.span( - "tool_execution", - { - "tool_name": tool_call.tool_name, - "input": message.model_dump_json(), - } - if self.telemetry_enabled - else {}, - ) as span: - tool_execution_start_time = datetime.now(UTC).isoformat() - tool_result = await self.execute_tool_call_maybe( - session_id, - tool_call, - ) - if tool_result.content is None: - raise ValueError( - f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content" - ) - result_message = ToolResponseMessage( - call_id=tool_call.call_id, - content=tool_result.content, - ) - if self.telemetry_enabled and span is not None: - span.set_attribute("output", result_message.model_dump_json()) - - # Store tool execution step - tool_execution_step = ToolExecutionStep( - step_id=step_id, - turn_id=turn_id, - tool_calls=[tool_call], - tool_responses=[ - ToolResponse( - call_id=tool_call.call_id, - tool_name=tool_call.tool_name, - content=tool_result.content, - metadata=tool_result.metadata, - ) - ], - started_at=tool_execution_start_time, - completed_at=datetime.now(UTC).isoformat(), - ) - - # Yield the step completion event - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - step_details=tool_execution_step, - ) - ) - ) - - # Add the result message to input_messages for the next iteration - input_messages.append(result_message) - - # TODO: add tool-input touchpoint and a "start" event for this step also - # but that needs a lot more refactoring of Tool code potentially - if (type(result_message.content) is str) and ( - out_attachment := _interpret_content_as_attachment(result_message.content) - ): - # NOTE: when we push this message back to the model, the model may ignore the - # attached file path etc. since the model is trained to only provide a user message - # with the summary. We keep all generated attachments and then attach them to final message - output_attachments.append(out_attachment) - - # If there are client tool calls, yield a message with only those tool calls - if client_tool_calls: - await self.storage.set_in_progress_tool_call_step( - session_id, - turn_id, - ToolExecutionStep( - step_id=step_id, - turn_id=turn_id, - tool_calls=client_tool_calls, - tool_responses=[], - started_at=datetime.now(UTC).isoformat(), - ), - ) - - # Create a copy of the message with only client tool calls - client_message = message.model_copy(deep=True) - client_message.tool_calls = client_tool_calls - # NOTE: mark end_of_message to indicate to client that it may - # call the tool and continue the conversation with the tool's response. - client_message.stop_reason = StopReason.end_of_message - - # Yield the message with client tool calls - yield client_message - return - - async def _initialize_tools( - self, - toolgroups_for_turn: list[AgentToolGroup] | None = None, - ) -> None: - toolgroup_to_args = {} - for toolgroup in (self.agent_config.toolgroups or []) + (toolgroups_for_turn or []): - if isinstance(toolgroup, AgentToolGroupWithArgs): - tool_group_name, _ = self._parse_toolgroup_name(toolgroup.name) - toolgroup_to_args[tool_group_name] = toolgroup.args - - # Determine which tools to include - tool_groups_to_include = toolgroups_for_turn or self.agent_config.toolgroups or [] - agent_config_toolgroups = [] - for toolgroup in tool_groups_to_include: - name = toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup - if name not in agent_config_toolgroups: - agent_config_toolgroups.append(name) - - toolgroup_to_args = toolgroup_to_args or {} - - tool_name_to_def = {} - tool_name_to_args = {} - - for tool_def in self.agent_config.client_tools: - if tool_name_to_def.get(tool_def.name, None): - raise ValueError(f"Tool {tool_def.name} already exists") - - # Use input_schema from ToolDef directly - tool_name_to_def[tool_def.name] = ToolDefinition( - tool_name=tool_def.name, - description=tool_def.description, - input_schema=tool_def.input_schema, - ) - for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups: - toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name) - tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name) - if not tools.data: - available_tool_groups = ", ".join( - [t.identifier for t in (await self.tool_groups_api.list_tool_groups()).data] - ) - raise ValueError(f"Toolgroup {toolgroup_name} not found, available toolgroups: {available_tool_groups}") - if input_tool_name is not None and not any(tool.name == input_tool_name for tool in tools.data): - raise ValueError( - f"Tool {input_tool_name} not found in toolgroup {toolgroup_name}. Available tools: {', '.join([tool.name for tool in tools.data])}" - ) - - for tool_def in tools.data: - if toolgroup_name.startswith("builtin") and toolgroup_name != RAG_TOOL_GROUP: - identifier: str | BuiltinTool | None = tool_def.name - if identifier == "web_search": - identifier = BuiltinTool.brave_search - else: - identifier = BuiltinTool(identifier) - else: - # add if tool_name is unspecified or the tool_def identifier is the same as the tool_name - if input_tool_name in (None, tool_def.name): - identifier = tool_def.name - else: - identifier = None - - if tool_name_to_def.get(identifier, None): - raise ValueError(f"Tool {identifier} already exists") - if identifier: - tool_name_to_def[identifier] = ToolDefinition( - tool_name=identifier, - description=tool_def.description, - input_schema=tool_def.input_schema, - ) - tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {}) - - self.tool_defs, self.tool_name_to_args = ( - list(tool_name_to_def.values()), - tool_name_to_args, - ) - - def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, str | None]: - """Parse a toolgroup name into its components. - - Args: - toolgroup_name: The toolgroup name to parse (e.g. "builtin::rag/knowledge_search") - - Returns: - A tuple of (tool_type, tool_group, tool_name) - """ - split_names = toolgroup_name_with_maybe_tool_name.split("/") - if len(split_names) == 2: - # e.g. "builtin::rag" - tool_group, tool_name = split_names - else: - tool_group, tool_name = split_names[0], None - return tool_group, tool_name - - async def execute_tool_call_maybe( - self, - session_id: str, - tool_call: ToolCall, - ) -> ToolInvocationResult: - tool_name = tool_call.tool_name - registered_tool_names = [tool_def.tool_name for tool_def in self.tool_defs] - if tool_name not in registered_tool_names: - raise ValueError( - f"Tool {tool_name} not found in provided tools, registered tools: {', '.join([str(x) for x in registered_tool_names])}" - ) - if isinstance(tool_name, BuiltinTool): - if tool_name == BuiltinTool.brave_search: - tool_name_str = WEB_SEARCH_TOOL - else: - tool_name_str = tool_name.value - else: - tool_name_str = tool_name - - logger.info(f"executing tool call: {tool_name_str} with args: {tool_call.arguments}") - - try: - args = json.loads(tool_call.arguments) - except json.JSONDecodeError as e: - raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e - - result = await self.tool_runtime_api.invoke_tool( - tool_name=tool_name_str, - kwargs={ - "session_id": session_id, - # get the arguments generated by the model and augment with toolgroup arg overrides for the agent - **args, - **self.tool_name_to_args.get(tool_name_str, {}), - }, - ) - logger.debug(f"tool call {tool_name_str} completed with result: {result}") - return result - - -async def load_data_from_url(url: str) -> str: - if url.startswith("http"): - async with httpx.AsyncClient() as client: - r = await client.get(url) - resp = r.text - return resp - raise ValueError(f"Unexpected URL: {type(url)}") - - -async def get_raw_document_text(document: Document) -> str: - # Handle deprecated text/yaml mime type with warning - if document.mime_type == "text/yaml": - warnings.warn( - "The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.", - DeprecationWarning, - stacklevel=2, - ) - elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")): - raise ValueError(f"Unexpected document mime type: {document.mime_type}") - - if isinstance(document.content, URL): - return await load_data_from_url(document.content.uri) - elif isinstance(document.content, str): - return document.content - elif isinstance(document.content, TextContentItem): - return document.content.text - else: - raise ValueError(f"Unexpected document content type: {type(document.content)}") - - -def _interpret_content_as_attachment( - content: str, -) -> Attachment | None: - match = re.search(TOOLS_ATTACHMENT_KEY_REGEX, content) - if match: - snippet = match.group(1) - data = json.loads(snippet) - return Attachment( - url=URL(uri="file://" + data["filepath"]), - mime_type=data["mimetype"], - ) - - return None diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py deleted file mode 100644 index c2f6ea640..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ /dev/null @@ -1,383 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import uuid -from collections.abc import AsyncGenerator -from datetime import UTC, datetime - -from llama_stack.apis.agents import ( - Agent, - AgentConfig, - AgentCreateResponse, - Agents, - AgentSessionCreateResponse, - AgentStepResponse, - AgentToolGroup, - AgentTurnCreateRequest, - AgentTurnResumeRequest, - Document, - ListOpenAIResponseInputItem, - ListOpenAIResponseObject, - OpenAIResponseInput, - OpenAIResponseInputTool, - OpenAIResponseObject, - Order, - Session, - Turn, -) -from llama_stack.apis.agents.agents import ResponseGuardrail -from llama_stack.apis.agents.openai_responses import OpenAIResponseText -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.inference import ( - Inference, - ToolConfig, - ToolResponse, - ToolResponseMessage, - UserMessage, -) -from llama_stack.apis.safety import Safety -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.core.datatypes import AccessRule -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl -from llama_stack.providers.utils.pagination import paginate_records -from llama_stack.providers.utils.responses.responses_store import ResponsesStore - -from .agent_instance import ChatAgent -from .config import MetaReferenceAgentsImplConfig -from .persistence import AgentInfo -from .responses.openai_responses import OpenAIResponsesImpl - -logger = get_logger(name=__name__, category="agents::meta_reference") - - -class MetaReferenceAgentsImpl(Agents): - def __init__( - self, - config: MetaReferenceAgentsImplConfig, - inference_api: Inference, - vector_io_api: VectorIO, - safety_api: Safety, - tool_runtime_api: ToolRuntime, - tool_groups_api: ToolGroups, - conversations_api: Conversations, - policy: list[AccessRule], - telemetry_enabled: bool = False, - ): - self.config = config - self.inference_api = inference_api - self.vector_io_api = vector_io_api - self.safety_api = safety_api - self.tool_runtime_api = tool_runtime_api - self.tool_groups_api = tool_groups_api - self.conversations_api = conversations_api - self.telemetry_enabled = telemetry_enabled - - self.in_memory_store = InmemoryKVStoreImpl() - self.openai_responses_impl: OpenAIResponsesImpl | None = None - self.policy = policy - - async def initialize(self) -> None: - self.persistence_store = await kvstore_impl(self.config.persistence.agent_state) - self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy) - await self.responses_store.initialize() - self.openai_responses_impl = OpenAIResponsesImpl( - inference_api=self.inference_api, - tool_groups_api=self.tool_groups_api, - tool_runtime_api=self.tool_runtime_api, - responses_store=self.responses_store, - vector_io_api=self.vector_io_api, - safety_api=self.safety_api, - conversations_api=self.conversations_api, - ) - - async def create_agent( - self, - agent_config: AgentConfig, - ) -> AgentCreateResponse: - agent_id = str(uuid.uuid4()) - created_at = datetime.now(UTC) - - agent_info = AgentInfo( - **agent_config.model_dump(), - created_at=created_at, - ) - - # Store the agent info - await self.persistence_store.set( - key=f"agent:{agent_id}", - value=agent_info.model_dump_json(), - ) - - return AgentCreateResponse( - agent_id=agent_id, - ) - - async def _get_agent_impl(self, agent_id: str) -> ChatAgent: - agent_info_json = await self.persistence_store.get( - key=f"agent:{agent_id}", - ) - if not agent_info_json: - raise ValueError(f"Could not find agent info for {agent_id}") - - try: - agent_info = AgentInfo.model_validate_json(agent_info_json) - except Exception as e: - raise ValueError(f"Could not validate agent info for {agent_id}") from e - - return ChatAgent( - agent_id=agent_id, - agent_config=agent_info, - inference_api=self.inference_api, - safety_api=self.safety_api, - vector_io_api=self.vector_io_api, - tool_runtime_api=self.tool_runtime_api, - tool_groups_api=self.tool_groups_api, - persistence_store=( - self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store - ), - created_at=agent_info.created_at, - policy=self.policy, - telemetry_enabled=self.telemetry_enabled, - ) - - async def create_agent_session( - self, - agent_id: str, - session_name: str, - ) -> AgentSessionCreateResponse: - agent = await self._get_agent_impl(agent_id) - - session_id = await agent.create_session(session_name) - return AgentSessionCreateResponse( - session_id=session_id, - ) - - async def create_agent_turn( - self, - agent_id: str, - session_id: str, - messages: list[UserMessage | ToolResponseMessage], - toolgroups: list[AgentToolGroup] | None = None, - documents: list[Document] | None = None, - stream: bool | None = False, - tool_config: ToolConfig | None = None, - ) -> AsyncGenerator: - request = AgentTurnCreateRequest( - agent_id=agent_id, - session_id=session_id, - messages=messages, - stream=True, - toolgroups=toolgroups, - documents=documents, - tool_config=tool_config, - ) - if stream: - return self._create_agent_turn_streaming(request) - else: - raise NotImplementedError("Non-streaming agent turns not yet implemented") - - async def _create_agent_turn_streaming( - self, - request: AgentTurnCreateRequest, - ) -> AsyncGenerator: - agent = await self._get_agent_impl(request.agent_id) - async for event in agent.create_and_execute_turn(request): - yield event - - async def resume_agent_turn( - self, - agent_id: str, - session_id: str, - turn_id: str, - tool_responses: list[ToolResponse], - stream: bool | None = False, - ) -> AsyncGenerator: - request = AgentTurnResumeRequest( - agent_id=agent_id, - session_id=session_id, - turn_id=turn_id, - tool_responses=tool_responses, - stream=stream, - ) - if stream: - return self._continue_agent_turn_streaming(request) - else: - raise NotImplementedError("Non-streaming agent turns not yet implemented") - - async def _continue_agent_turn_streaming( - self, - request: AgentTurnResumeRequest, - ) -> AsyncGenerator: - agent = await self._get_agent_impl(request.agent_id) - async for event in agent.resume_turn(request): - yield event - - async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn: - agent = await self._get_agent_impl(agent_id) - turn = await agent.storage.get_session_turn(session_id, turn_id) - return turn - - async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse: - turn = await self.get_agents_turn(agent_id, session_id, turn_id) - for step in turn.steps: - if step.step_id == step_id: - return AgentStepResponse(step=step) - raise ValueError(f"Provided step_id {step_id} could not be found") - - async def get_agents_session( - self, - agent_id: str, - session_id: str, - turn_ids: list[str] | None = None, - ) -> Session: - agent = await self._get_agent_impl(agent_id) - - session_info = await agent.storage.get_session_info(session_id) - turns = await agent.storage.get_session_turns(session_id) - if turn_ids: - turns = [turn for turn in turns if turn.turn_id in turn_ids] - return Session( - session_name=session_info.session_name, - session_id=session_id, - turns=turns, - started_at=session_info.started_at, - ) - - async def delete_agents_session(self, agent_id: str, session_id: str) -> None: - agent = await self._get_agent_impl(agent_id) - - # Delete turns first, then the session - await agent.storage.delete_session_turns(session_id) - await agent.storage.delete_session(session_id) - - async def delete_agent(self, agent_id: str) -> None: - # First get all sessions for this agent - agent = await self._get_agent_impl(agent_id) - sessions = await agent.storage.list_sessions() - - # Delete all sessions - for session in sessions: - await self.delete_agents_session(agent_id, session.session_id) - - # Finally delete the agent itself - await self.persistence_store.delete(f"agent:{agent_id}") - - async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: - agent_keys = await self.persistence_store.keys_in_range("agent:", "agent:\xff") - agent_list: list[Agent] = [] - for agent_key in agent_keys: - agent_id = agent_key.split(":")[1] - - # Get the agent info using the key - agent_info_json = await self.persistence_store.get(agent_key) - if not agent_info_json: - logger.error(f"Could not find agent info for key {agent_key}") - continue - - try: - agent_info = AgentInfo.model_validate_json(agent_info_json) - agent_list.append( - Agent( - agent_id=agent_id, - agent_config=agent_info, - created_at=agent_info.created_at, - ) - ) - except Exception as e: - logger.error(f"Error parsing agent info for {agent_id}: {e}") - continue - - # Convert Agent objects to dictionaries - agent_dicts = [agent.model_dump() for agent in agent_list] - return paginate_records(agent_dicts, start_index, limit) - - async def get_agent(self, agent_id: str) -> Agent: - chat_agent = await self._get_agent_impl(agent_id) - agent = Agent( - agent_id=agent_id, - agent_config=chat_agent.agent_config, - created_at=chat_agent.created_at, - ) - return agent - - async def list_agent_sessions( - self, agent_id: str, start_index: int | None = None, limit: int | None = None - ) -> PaginatedResponse: - agent = await self._get_agent_impl(agent_id) - sessions = await agent.storage.list_sessions() - # Convert Session objects to dictionaries - session_dicts = [session.model_dump() for session in sessions] - return paginate_records(session_dicts, start_index, limit) - - async def shutdown(self) -> None: - pass - - # OpenAI responses - async def get_openai_response( - self, - response_id: str, - ) -> OpenAIResponseObject: - return await self.openai_responses_impl.get_openai_response(response_id) - - async def create_openai_response( - self, - input: str | list[OpenAIResponseInput], - model: str, - instructions: str | None = None, - previous_response_id: str | None = None, - conversation: str | None = None, - store: bool | None = True, - stream: bool | None = False, - temperature: float | None = None, - text: OpenAIResponseText | None = None, - tools: list[OpenAIResponseInputTool] | None = None, - include: list[str] | None = None, - max_infer_iters: int | None = 10, - guardrails: list[ResponseGuardrail] | None = None, - ) -> OpenAIResponseObject: - return await self.openai_responses_impl.create_openai_response( - input, - model, - instructions, - previous_response_id, - conversation, - store, - stream, - temperature, - text, - tools, - include, - max_infer_iters, - guardrails, - ) - - async def list_openai_responses( - self, - after: str | None = None, - limit: int | None = 50, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseObject: - return await self.openai_responses_impl.list_openai_responses(after, limit, model, order) - - async def list_openai_response_input_items( - self, - response_id: str, - after: str | None = None, - before: str | None = None, - include: list[str] | None = None, - limit: int | None = 20, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseInputItem: - return await self.openai_responses_impl.list_openai_response_input_items( - response_id, after, before, include, limit, order - ) - - async def delete_openai_response(self, response_id: str) -> None: - return await self.openai_responses_impl.delete_openai_response(response_id) diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py deleted file mode 100644 index 3b7b4729c..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -import uuid -from datetime import UTC, datetime - -from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn -from llama_stack.apis.common.errors import SessionNotFoundError -from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed -from llama_stack.core.access_control.datatypes import AccessRule -from llama_stack.core.datatypes import User -from llama_stack.core.request_headers import get_authenticated_user -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import KVStore - -log = get_logger(name=__name__, category="agents::meta_reference") - - -class AgentSessionInfo(Session): - # TODO: is this used anywhere? - vector_db_id: str | None = None - started_at: datetime - owner: User | None = None - identifier: str | None = None - type: str = "session" - - -class AgentInfo(AgentConfig): - created_at: datetime - - -class AgentPersistence: - def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]): - self.agent_id = agent_id - self.kvstore = kvstore - self.policy = policy - - async def create_session(self, name: str) -> str: - session_id = str(uuid.uuid4()) - - # Get current user's auth attributes for new sessions - user = get_authenticated_user() - - session_info = AgentSessionInfo( - session_id=session_id, - session_name=name, - started_at=datetime.now(UTC), - owner=user, - turns=[], - identifier=name, # should this be qualified in any way? - ) - if not is_action_allowed(self.policy, "create", session_info, user): - raise AccessDeniedError("create", session_info, user) - - await self.kvstore.set( - key=f"session:{self.agent_id}:{session_id}", - value=session_info.model_dump_json(), - ) - return session_id - - async def get_session_info(self, session_id: str) -> AgentSessionInfo: - value = await self.kvstore.get( - key=f"session:{self.agent_id}:{session_id}", - ) - if not value: - raise SessionNotFoundError(session_id) - - session_info = AgentSessionInfo(**json.loads(value)) - - # Check access to session - if not self._check_session_access(session_info): - return None - - return session_info - - def _check_session_access(self, session_info: AgentSessionInfo) -> bool: - """Check if current user has access to the session.""" - # Handle backward compatibility for old sessions without access control - if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"): - return True - - return is_action_allowed(self.policy, "read", session_info, get_authenticated_user()) - - async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None: - """Get session info if the user has access to it. For internal use by sub-session methods.""" - session_info = await self.get_session_info(session_id) - if not session_info: - return None - - return session_info - - async def add_vector_db_to_session(self, session_id: str, vector_db_id: str): - session_info = await self.get_session_if_accessible(session_id) - if session_info is None: - raise SessionNotFoundError(session_id) - - session_info.vector_db_id = vector_db_id - await self.kvstore.set( - key=f"session:{self.agent_id}:{session_id}", - value=session_info.model_dump_json(), - ) - - async def add_turn_to_session(self, session_id: str, turn: Turn): - if not await self.get_session_if_accessible(session_id): - raise SessionNotFoundError(session_id) - - await self.kvstore.set( - key=f"session:{self.agent_id}:{session_id}:{turn.turn_id}", - value=turn.model_dump_json(), - ) - - async def get_session_turns(self, session_id: str) -> list[Turn]: - if not await self.get_session_if_accessible(session_id): - raise SessionNotFoundError(session_id) - - values = await self.kvstore.values_in_range( - start_key=f"session:{self.agent_id}:{session_id}:", - end_key=f"session:{self.agent_id}:{session_id}:\xff\xff\xff\xff", - ) - turns = [] - for value in values: - try: - turn = Turn(**json.loads(value)) - turns.append(turn) - except Exception as e: - log.error(f"Error parsing turn: {e}") - continue - - # The kvstore does not guarantee order, so we sort by started_at - # to ensure consistent ordering of turns. - turns.sort(key=lambda t: t.started_at) - - return turns - - async def get_session_turn(self, session_id: str, turn_id: str) -> Turn | None: - if not await self.get_session_if_accessible(session_id): - raise SessionNotFoundError(session_id) - - value = await self.kvstore.get( - key=f"session:{self.agent_id}:{session_id}:{turn_id}", - ) - if not value: - return None - return Turn(**json.loads(value)) - - async def set_in_progress_tool_call_step(self, session_id: str, turn_id: str, step: ToolExecutionStep): - if not await self.get_session_if_accessible(session_id): - raise SessionNotFoundError(session_id) - - await self.kvstore.set( - key=f"in_progress_tool_call_step:{self.agent_id}:{session_id}:{turn_id}", - value=step.model_dump_json(), - ) - - async def get_in_progress_tool_call_step(self, session_id: str, turn_id: str) -> ToolExecutionStep | None: - if not await self.get_session_if_accessible(session_id): - return None - - value = await self.kvstore.get( - key=f"in_progress_tool_call_step:{self.agent_id}:{session_id}:{turn_id}", - ) - return ToolExecutionStep(**json.loads(value)) if value else None - - async def set_num_infer_iters_in_turn(self, session_id: str, turn_id: str, num_infer_iters: int): - if not await self.get_session_if_accessible(session_id): - raise SessionNotFoundError(session_id) - - await self.kvstore.set( - key=f"num_infer_iters_in_turn:{self.agent_id}:{session_id}:{turn_id}", - value=str(num_infer_iters), - ) - - async def get_num_infer_iters_in_turn(self, session_id: str, turn_id: str) -> int | None: - if not await self.get_session_if_accessible(session_id): - return None - - value = await self.kvstore.get( - key=f"num_infer_iters_in_turn:{self.agent_id}:{session_id}:{turn_id}", - ) - return int(value) if value else None - - async def list_sessions(self) -> list[Session]: - values = await self.kvstore.values_in_range( - start_key=f"session:{self.agent_id}:", - end_key=f"session:{self.agent_id}:\xff\xff\xff\xff", - ) - sessions = [] - for value in values: - try: - data = json.loads(value) - if "turn_id" in data: - continue - - session_info = Session(**data) - sessions.append(session_info) - except Exception as e: - log.error(f"Error parsing session info: {e}") - continue - return sessions - - async def delete_session_turns(self, session_id: str) -> None: - """Delete all turns and their associated data for a session. - - Args: - session_id: The ID of the session whose turns should be deleted. - """ - turns = await self.get_session_turns(session_id) - for turn in turns: - await self.kvstore.delete(key=f"session:{self.agent_id}:{session_id}:{turn.turn_id}") - - async def delete_session(self, session_id: str) -> None: - """Delete a session and all its associated turns. - - Args: - session_id: The ID of the session to delete. - - Raises: - ValueError: If the session does not exist. - """ - session_info = await self.get_session_info(session_id) - if session_info is None: - raise SessionNotFoundError(session_id) - - await self.kvstore.delete(key=f"session:{self.agent_id}:{session_id}") diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py deleted file mode 100644 index 2360dafd9..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ /dev/null @@ -1,423 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import time -import uuid -from collections.abc import AsyncIterator - -from pydantic import BaseModel, TypeAdapter - -from llama_stack.apis.agents import Order -from llama_stack.apis.agents.agents import ResponseGuardrailSpec -from llama_stack.apis.agents.openai_responses import ( - ListOpenAIResponseInputItem, - ListOpenAIResponseObject, - OpenAIDeleteResponseObject, - OpenAIResponseInput, - OpenAIResponseInputMessageContentText, - OpenAIResponseInputTool, - OpenAIResponseMessage, - OpenAIResponseObject, - OpenAIResponseObjectStream, - OpenAIResponseText, - OpenAIResponseTextFormat, -) -from llama_stack.apis.common.errors import ( - InvalidConversationIdError, -) -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.conversations.conversations import ConversationItem -from llama_stack.apis.inference import ( - Inference, - OpenAIMessageParam, - OpenAISystemMessageParam, -) -from llama_stack.apis.safety import Safety -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.log import get_logger -from llama_stack.providers.utils.responses.responses_store import ( - ResponsesStore, - _OpenAIResponseObjectWithInputAndMessages, -) - -from .streaming import StreamingResponseOrchestrator -from .tool_executor import ToolExecutor -from .types import ChatCompletionContext, ToolContext -from .utils import ( - convert_response_input_to_chat_messages, - convert_response_text_to_chat_response_format, - extract_guardrail_ids, -) - -logger = get_logger(name=__name__, category="openai_responses") - - -class OpenAIResponsePreviousResponseWithInputItems(BaseModel): - input_items: ListOpenAIResponseInputItem - response: OpenAIResponseObject - - -class OpenAIResponsesImpl: - def __init__( - self, - inference_api: Inference, - tool_groups_api: ToolGroups, - tool_runtime_api: ToolRuntime, - responses_store: ResponsesStore, - vector_io_api: VectorIO, # VectorIO - safety_api: Safety, - conversations_api: Conversations, - ): - self.inference_api = inference_api - self.tool_groups_api = tool_groups_api - self.tool_runtime_api = tool_runtime_api - self.responses_store = responses_store - self.vector_io_api = vector_io_api - self.safety_api = safety_api - self.conversations_api = conversations_api - self.tool_executor = ToolExecutor( - tool_groups_api=tool_groups_api, - tool_runtime_api=tool_runtime_api, - vector_io_api=vector_io_api, - ) - - async def _prepend_previous_response( - self, - input: str | list[OpenAIResponseInput], - previous_response: _OpenAIResponseObjectWithInputAndMessages, - ): - new_input_items = previous_response.input.copy() - new_input_items.extend(previous_response.output) - - if isinstance(input, str): - new_input_items.append(OpenAIResponseMessage(content=input, role="user")) - else: - new_input_items.extend(input) - - return new_input_items - - async def _process_input_with_previous_response( - self, - input: str | list[OpenAIResponseInput], - tools: list[OpenAIResponseInputTool] | None, - previous_response_id: str | None, - conversation: str | None, - ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]: - """Process input with optional previous response context. - - Returns: - tuple: (all_input for storage, messages for chat completion, tool context) - """ - tool_context = ToolContext(tools) - if previous_response_id: - previous_response: _OpenAIResponseObjectWithInputAndMessages = ( - await self.responses_store.get_response_object(previous_response_id) - ) - all_input = await self._prepend_previous_response(input, previous_response) - - if previous_response.messages: - # Use stored messages directly and convert only new input - message_adapter = TypeAdapter(list[OpenAIMessageParam]) - messages = message_adapter.validate_python(previous_response.messages) - new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages) - messages.extend(new_messages) - else: - # Backward compatibility: reconstruct from inputs - messages = await convert_response_input_to_chat_messages(all_input) - - tool_context.recover_tools_from_previous_response(previous_response) - elif conversation is not None: - conversation_items = await self.conversations_api.list(conversation, order="asc") - - # Use stored messages as source of truth (like previous_response.messages) - stored_messages = await self.responses_store.get_conversation_messages(conversation) - - all_input = input - if not conversation_items.data: - # First turn - just convert the new input - messages = await convert_response_input_to_chat_messages(input) - else: - if not stored_messages: - all_input = conversation_items.data - if isinstance(input, str): - all_input.append( - OpenAIResponseMessage( - role="user", content=[OpenAIResponseInputMessageContentText(text=input)] - ) - ) - else: - all_input.extend(input) - else: - all_input = input - - messages = stored_messages or [] - new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages) - messages.extend(new_messages) - else: - all_input = input - messages = await convert_response_input_to_chat_messages(all_input) - - return all_input, messages, tool_context - - async def get_openai_response( - self, - response_id: str, - ) -> OpenAIResponseObject: - response_with_input = await self.responses_store.get_response_object(response_id) - return response_with_input.to_response_object() - - async def list_openai_responses( - self, - after: str | None = None, - limit: int | None = 50, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseObject: - return await self.responses_store.list_responses(after, limit, model, order) - - async def list_openai_response_input_items( - self, - response_id: str, - after: str | None = None, - before: str | None = None, - include: list[str] | None = None, - limit: int | None = 20, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseInputItem: - """List input items for a given OpenAI response. - - :param response_id: The ID of the response to retrieve input items for. - :param after: An item ID to list items after, used for pagination. - :param before: An item ID to list items before, used for pagination. - :param include: Additional fields to include in the response. - :param limit: A limit on the number of objects to be returned. - :param order: The order to return the input items in. - :returns: An ListOpenAIResponseInputItem. - """ - return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order) - - async def _store_response( - self, - response: OpenAIResponseObject, - input: str | list[OpenAIResponseInput], - messages: list[OpenAIMessageParam], - ) -> None: - new_input_id = f"msg_{uuid.uuid4()}" - if isinstance(input, str): - # synthesize a message from the input string - input_content = OpenAIResponseInputMessageContentText(text=input) - input_content_item = OpenAIResponseMessage( - role="user", - content=[input_content], - id=new_input_id, - ) - input_items_data = [input_content_item] - else: - # we already have a list of messages - input_items_data = [] - for input_item in input: - if isinstance(input_item, OpenAIResponseMessage): - # These may or may not already have an id, so dump to dict, check for id, and add if missing - input_item_dict = input_item.model_dump() - if "id" not in input_item_dict: - input_item_dict["id"] = new_input_id - input_items_data.append(OpenAIResponseMessage(**input_item_dict)) - else: - input_items_data.append(input_item) - - await self.responses_store.store_response_object( - response_object=response, - input=input_items_data, - messages=messages, - ) - - async def create_openai_response( - self, - input: str | list[OpenAIResponseInput], - model: str, - instructions: str | None = None, - previous_response_id: str | None = None, - conversation: str | None = None, - store: bool | None = True, - stream: bool | None = False, - temperature: float | None = None, - text: OpenAIResponseText | None = None, - tools: list[OpenAIResponseInputTool] | None = None, - include: list[str] | None = None, - max_infer_iters: int | None = 10, - guardrails: list[ResponseGuardrailSpec] | None = None, - ): - stream = bool(stream) - text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text - - guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else [] - - if conversation is not None: - if previous_response_id is not None: - raise ValueError( - "Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters." - ) - - if not conversation.startswith("conv_"): - raise InvalidConversationIdError(conversation) - - stream_gen = self._create_streaming_response( - input=input, - conversation=conversation, - model=model, - instructions=instructions, - previous_response_id=previous_response_id, - store=store, - temperature=temperature, - text=text, - tools=tools, - max_infer_iters=max_infer_iters, - guardrail_ids=guardrail_ids, - ) - - if stream: - return stream_gen - else: - final_response = None - final_event_type = None - failed_response = None - - async for stream_chunk in stream_gen: - if stream_chunk.type in {"response.completed", "response.incomplete"}: - if final_response is not None: - raise ValueError( - "The response stream produced multiple terminal responses! " - f"Earlier response from {final_event_type}" - ) - final_response = stream_chunk.response - final_event_type = stream_chunk.type - elif stream_chunk.type == "response.failed": - failed_response = stream_chunk.response - - if failed_response is not None: - error_message = ( - failed_response.error.message - if failed_response and failed_response.error - else "Response stream failed without error details" - ) - raise RuntimeError(f"OpenAI response failed: {error_message}") - - if final_response is None: - raise ValueError("The response stream never reached a terminal state") - return final_response - - async def _create_streaming_response( - self, - input: str | list[OpenAIResponseInput], - model: str, - instructions: str | None = None, - previous_response_id: str | None = None, - conversation: str | None = None, - store: bool | None = True, - temperature: float | None = None, - text: OpenAIResponseText | None = None, - tools: list[OpenAIResponseInputTool] | None = None, - max_infer_iters: int | None = 10, - guardrail_ids: list[str] | None = None, - ) -> AsyncIterator[OpenAIResponseObjectStream]: - # Input preprocessing - all_input, messages, tool_context = await self._process_input_with_previous_response( - input, tools, previous_response_id, conversation - ) - - if instructions: - messages.insert(0, OpenAISystemMessageParam(content=instructions)) - - # Structured outputs - response_format = await convert_response_text_to_chat_response_format(text) - - ctx = ChatCompletionContext( - model=model, - messages=messages, - response_tools=tools, - temperature=temperature, - response_format=response_format, - tool_context=tool_context, - inputs=all_input, - ) - - # Create orchestrator and delegate streaming logic - response_id = f"resp_{uuid.uuid4()}" - created_at = int(time.time()) - - orchestrator = StreamingResponseOrchestrator( - inference_api=self.inference_api, - ctx=ctx, - response_id=response_id, - created_at=created_at, - text=text, - max_infer_iters=max_infer_iters, - tool_executor=self.tool_executor, - safety_api=self.safety_api, - guardrail_ids=guardrail_ids, - instructions=instructions, - ) - - # Stream the response - final_response = None - failed_response = None - - output_items = [] - async for stream_chunk in orchestrator.create_response(): - if stream_chunk.type in {"response.completed", "response.incomplete"}: - final_response = stream_chunk.response - elif stream_chunk.type == "response.failed": - failed_response = stream_chunk.response - yield stream_chunk - - if stream_chunk.type == "response.output_item.done": - item = stream_chunk.item - output_items.append(item) - - # Store and sync immediately after yielding terminal events - # This ensures the storage/syncing happens even if the consumer breaks early - if ( - stream_chunk.type in {"response.completed", "response.incomplete"} - and final_response - and failed_response is None - ): - messages_to_store = list( - filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages) - ) - if store: - # TODO: we really should work off of output_items instead of "final_messages" - await self._store_response( - response=final_response, - input=all_input, - messages=messages_to_store, - ) - - if conversation: - await self._sync_response_to_conversation(conversation, input, output_items) - await self.responses_store.store_conversation_messages(conversation, messages_to_store) - - async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: - return await self.responses_store.delete_response_object(response_id) - - async def _sync_response_to_conversation( - self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem] - ) -> None: - """Sync content and response messages to the conversation.""" - conversation_items = [] - - if isinstance(input, str): - conversation_items.append( - OpenAIResponseMessage(role="user", content=[OpenAIResponseInputMessageContentText(text=input)]) - ) - elif isinstance(input, list): - conversation_items.extend(input) - - conversation_items.extend(output_items) - - adapter = TypeAdapter(list[ConversationItem]) - validated_items = adapter.validate_python(conversation_items) - await self.conversations_api.add_items(conversation_id, validated_items) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py deleted file mode 100644 index 659dc599e..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ /dev/null @@ -1,449 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import json -from collections.abc import AsyncIterator - -from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseInputToolFileSearch, - OpenAIResponseInputToolMCP, - OpenAIResponseObjectStreamResponseFileSearchCallCompleted, - OpenAIResponseObjectStreamResponseFileSearchCallInProgress, - OpenAIResponseObjectStreamResponseFileSearchCallSearching, - OpenAIResponseObjectStreamResponseMcpCallCompleted, - OpenAIResponseObjectStreamResponseMcpCallFailed, - OpenAIResponseObjectStreamResponseMcpCallInProgress, - OpenAIResponseObjectStreamResponseWebSearchCallCompleted, - OpenAIResponseObjectStreamResponseWebSearchCallInProgress, - OpenAIResponseObjectStreamResponseWebSearchCallSearching, - OpenAIResponseOutputMessageFileSearchToolCall, - OpenAIResponseOutputMessageFileSearchToolCallResults, - OpenAIResponseOutputMessageWebSearchToolCall, -) -from llama_stack.apis.common.content_types import ( - ImageContentItem, - TextContentItem, -) -from llama_stack.apis.inference import ( - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIChatCompletionToolCall, - OpenAIImageURL, - OpenAIToolMessageParam, -) -from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry import tracing - -from .types import ChatCompletionContext, ToolExecutionResult - -logger = get_logger(name=__name__, category="agents::meta_reference") - - -class ToolExecutor: - def __init__( - self, - tool_groups_api: ToolGroups, - tool_runtime_api: ToolRuntime, - vector_io_api: VectorIO, - ): - self.tool_groups_api = tool_groups_api - self.tool_runtime_api = tool_runtime_api - self.vector_io_api = vector_io_api - - async def execute_tool_call( - self, - tool_call: OpenAIChatCompletionToolCall, - ctx: ChatCompletionContext, - sequence_number: int, - output_index: int, - item_id: str, - mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, - ) -> AsyncIterator[ToolExecutionResult]: - tool_call_id = tool_call.id - function = tool_call.function - tool_kwargs = json.loads(function.arguments) if function.arguments else {} - - if not function or not tool_call_id or not function.name: - yield ToolExecutionResult(sequence_number=sequence_number) - return - - # Emit progress events for tool execution start - async for event_result in self._emit_progress_events( - function.name, ctx, sequence_number, output_index, item_id, mcp_tool_to_server - ): - sequence_number = event_result.sequence_number - yield event_result - - # Execute the actual tool call - error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server) - - # Emit completion events for tool execution - has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message)) - async for event_result in self._emit_completion_events( - function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server - ): - sequence_number = event_result.sequence_number - yield event_result - - # Build result messages from tool execution - output_message, input_message = await self._build_result_messages( - function, tool_call_id, item_id, tool_kwargs, ctx, error_exc, result, has_error, mcp_tool_to_server - ) - - # Yield the final result - yield ToolExecutionResult( - sequence_number=sequence_number, - final_output_message=output_message, - final_input_message=input_message, - citation_files=result.metadata.get("citation_files") if result and result.metadata else None, - ) - - async def _execute_knowledge_search_via_vector_store( - self, - query: str, - response_file_search_tool: OpenAIResponseInputToolFileSearch, - ) -> ToolInvocationResult: - """Execute knowledge search using vector_stores.search API with filters support.""" - search_results = [] - - # Create search tasks for all vector stores - async def search_single_store(vector_store_id): - try: - search_response = await self.vector_io_api.openai_search_vector_store( - vector_store_id=vector_store_id, - query=query, - filters=response_file_search_tool.filters, - max_num_results=response_file_search_tool.max_num_results, - ranking_options=response_file_search_tool.ranking_options, - rewrite_query=False, - ) - return search_response.data - except Exception as e: - logger.warning(f"Failed to search vector store {vector_store_id}: {e}") - return [] - - # Run all searches in parallel using gather - search_tasks = [search_single_store(vid) for vid in response_file_search_tool.vector_store_ids] - all_results = await asyncio.gather(*search_tasks) - - # Flatten results - for results in all_results: - search_results.extend(results) - - content_items = [] - content_items.append( - TextContentItem( - text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n" - ) - ) - - unique_files = set() - for i, result_item in enumerate(search_results): - chunk_text = result_item.content[0].text if result_item.content else "" - # Get file_id from attributes if result_item.file_id is empty - file_id = result_item.file_id or ( - result_item.attributes.get("document_id") if result_item.attributes else None - ) - metadata_text = f"document_id: {file_id}, score: {result_item.score}" - if result_item.attributes: - metadata_text += f", attributes: {result_item.attributes}" - - text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n" - content_items.append(TextContentItem(text=text_content)) - unique_files.add(file_id) - - content_items.append(TextContentItem(text="END of knowledge_search tool results.\n")) - - citation_instruction = "" - if unique_files: - citation_instruction = ( - " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). " - "Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)." - ) - - content_items.append( - TextContentItem( - text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n', - ) - ) - - # handling missing attributes for old versions - citation_files = {} - for result in search_results: - file_id = result.file_id - if not file_id and result.attributes: - file_id = result.attributes.get("document_id") - - filename = result.filename - if not filename and result.attributes: - filename = result.attributes.get("filename") - if not filename: - filename = "unknown" - - citation_files[file_id] = filename - - return ToolInvocationResult( - content=content_items, - metadata={ - "document_ids": [r.file_id for r in search_results], - "chunks": [r.content[0].text if r.content else "" for r in search_results], - "scores": [r.score for r in search_results], - "citation_files": citation_files, - }, - ) - - async def _emit_progress_events( - self, - function_name: str, - ctx: ChatCompletionContext, - sequence_number: int, - output_index: int, - item_id: str, - mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, - ) -> AsyncIterator[ToolExecutionResult]: - """Emit progress events for tool execution start.""" - # Emit in_progress event based on tool type (only for tools with specific streaming events) - progress_event = None - if mcp_tool_to_server and function_name in mcp_tool_to_server: - sequence_number += 1 - progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - elif function_name == "web_search": - sequence_number += 1 - progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - elif function_name == "knowledge_search": - sequence_number += 1 - progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - - if progress_event: - yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number) - - # For web search, emit searching event - if function_name == "web_search": - sequence_number += 1 - searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number) - - # For file search, emit searching event - if function_name == "knowledge_search": - sequence_number += 1 - searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number) - - async def _execute_tool( - self, - function_name: str, - tool_kwargs: dict, - ctx: ChatCompletionContext, - mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, - ) -> tuple[Exception | None, any]: - """Execute the tool and return error exception and result.""" - error_exc = None - result = None - - try: - if mcp_tool_to_server and function_name in mcp_tool_to_server: - from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool - - mcp_tool = mcp_tool_to_server[function_name] - attributes = { - "server_label": mcp_tool.server_label, - "server_url": mcp_tool.server_url, - "tool_name": function_name, - } - async with tracing.span("invoke_mcp_tool", attributes): - result = await invoke_mcp_tool( - endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, - tool_name=function_name, - kwargs=tool_kwargs, - ) - elif function_name == "knowledge_search": - response_file_search_tool = next( - (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)), - None, - ) - if response_file_search_tool: - # Use vector_stores.search API instead of knowledge_search tool - # to support filters and ranking_options - query = tool_kwargs.get("query", "") - async with tracing.span("knowledge_search", {}): - result = await self._execute_knowledge_search_via_vector_store( - query=query, - response_file_search_tool=response_file_search_tool, - ) - else: - attributes = { - "tool_name": function_name, - } - async with tracing.span("invoke_tool", attributes): - result = await self.tool_runtime_api.invoke_tool( - tool_name=function_name, - kwargs=tool_kwargs, - ) - except Exception as e: - error_exc = e - - return error_exc, result - - async def _emit_completion_events( - self, - function_name: str, - ctx: ChatCompletionContext, - sequence_number: int, - output_index: int, - item_id: str, - has_error: bool, - mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, - ) -> AsyncIterator[ToolExecutionResult]: - """Emit completion or failure events for tool execution.""" - completion_event = None - - if mcp_tool_to_server and function_name in mcp_tool_to_server: - sequence_number += 1 - if has_error: - completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed( - sequence_number=sequence_number, - ) - else: - completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted( - sequence_number=sequence_number, - ) - elif function_name == "web_search": - sequence_number += 1 - completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - elif function_name == "knowledge_search": - sequence_number += 1 - completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted( - item_id=item_id, - output_index=output_index, - sequence_number=sequence_number, - ) - - if completion_event: - yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number) - - async def _build_result_messages( - self, - function, - tool_call_id: str, - item_id: str, - tool_kwargs: dict, - ctx: ChatCompletionContext, - error_exc: Exception | None, - result: any, - has_error: bool, - mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, - ) -> tuple[any, any]: - """Build output and input messages from tool execution results.""" - from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, - ) - - # Build output message - if mcp_tool_to_server and function.name in mcp_tool_to_server: - from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseOutputMessageMCPCall, - ) - - message = OpenAIResponseOutputMessageMCPCall( - id=item_id, - arguments=function.arguments, - name=function.name, - server_label=mcp_tool_to_server[function.name].server_label, - ) - if error_exc: - message.error = str(error_exc) - elif (result and result.error_code and result.error_code > 0) or (result and result.error_message): - message.error = f"Error (code {result.error_code}): {result.error_message}" - elif result and result.content: - message.output = interleaved_content_as_str(result.content) - else: - if function.name == "web_search": - message = OpenAIResponseOutputMessageWebSearchToolCall( - id=item_id, - status="completed", - ) - if has_error: - message.status = "failed" - elif function.name == "knowledge_search": - message = OpenAIResponseOutputMessageFileSearchToolCall( - id=item_id, - queries=[tool_kwargs.get("query", "")], - status="completed", - ) - if result and "document_ids" in result.metadata: - message.results = [] - for i, doc_id in enumerate(result.metadata["document_ids"]): - text = result.metadata["chunks"][i] if "chunks" in result.metadata else None - score = result.metadata["scores"][i] if "scores" in result.metadata else None - message.results.append( - OpenAIResponseOutputMessageFileSearchToolCallResults( - file_id=doc_id, - filename=doc_id, - text=text, - score=score, - attributes={}, - ) - ) - if has_error: - message.status = "failed" - else: - raise ValueError(f"Unknown tool {function.name} called") - - # Build input message - input_message = None - if result and result.content: - if isinstance(result.content, str): - content = result.content - elif isinstance(result.content, list): - content = [] - for item in result.content: - if isinstance(item, TextContentItem): - part = OpenAIChatCompletionContentPartTextParam(text=item.text) - elif isinstance(item, ImageContentItem): - if item.image.data: - url = f"data:image;base64,{item.image.data}" - else: - url = item.image.url - part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url)) - else: - raise ValueError(f"Unknown result content type: {type(item)}") - content.append(part) - else: - raise ValueError(f"Unknown result content type: {type(result.content)}") - input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id) - else: - text = str(error_exc) if error_exc else "Tool execution failed" - input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id) - - return message, input_message diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py deleted file mode 100644 index 7ca8af632..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import re -import uuid - -from llama_stack.apis.agents.agents import ResponseGuardrailSpec -from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseAnnotationFileCitation, - OpenAIResponseInput, - OpenAIResponseInputFunctionToolCallOutput, - OpenAIResponseInputMessageContent, - OpenAIResponseInputMessageContentImage, - OpenAIResponseInputMessageContentText, - OpenAIResponseInputTool, - OpenAIResponseMCPApprovalRequest, - OpenAIResponseMCPApprovalResponse, - OpenAIResponseMessage, - OpenAIResponseOutputMessageContent, - OpenAIResponseOutputMessageContentOutputText, - OpenAIResponseOutputMessageFunctionToolCall, - OpenAIResponseOutputMessageMCPCall, - OpenAIResponseOutputMessageMCPListTools, - OpenAIResponseText, -) -from llama_stack.apis.inference import ( - OpenAIAssistantMessageParam, - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIChatCompletionToolCall, - OpenAIChatCompletionToolCallFunction, - OpenAIChoice, - OpenAIDeveloperMessageParam, - OpenAIImageURL, - OpenAIJSONSchema, - OpenAIMessageParam, - OpenAIResponseFormatJSONObject, - OpenAIResponseFormatJSONSchema, - OpenAIResponseFormatParam, - OpenAIResponseFormatText, - OpenAISystemMessageParam, - OpenAIToolMessageParam, - OpenAIUserMessageParam, -) -from llama_stack.apis.safety import Safety - - -async def convert_chat_choice_to_response_message( - choice: OpenAIChoice, - citation_files: dict[str, str] | None = None, - *, - message_id: str | None = None, -) -> OpenAIResponseMessage: - """Convert an OpenAI Chat Completion choice into an OpenAI Response output message.""" - output_content = "" - if isinstance(choice.message.content, str): - output_content = choice.message.content - elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): - output_content = choice.message.content.text - else: - raise ValueError( - f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}" - ) - - annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {}) - - return OpenAIResponseMessage( - id=message_id or f"msg_{uuid.uuid4()}", - content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)], - status="completed", - role="assistant", - ) - - -async def convert_response_content_to_chat_content( - content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]), -) -> str | list[OpenAIChatCompletionContentPartParam]: - """ - Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. - - The content schemas of each API look similar, but are not exactly the same. - """ - if isinstance(content, str): - return content - - converted_parts = [] - for content_part in content: - if isinstance(content_part, OpenAIResponseInputMessageContentText): - converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) - elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText): - converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) - elif isinstance(content_part, OpenAIResponseInputMessageContentImage): - if content_part.image_url: - image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail) - converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) - elif isinstance(content_part, str): - converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part)) - else: - raise ValueError( - f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context" - ) - return converted_parts - - -async def convert_response_input_to_chat_messages( - input: str | list[OpenAIResponseInput], - previous_messages: list[OpenAIMessageParam] | None = None, -) -> list[OpenAIMessageParam]: - """ - Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages. - - :param input: The input to convert - :param previous_messages: Optional previous messages to check for function_call references - """ - messages: list[OpenAIMessageParam] = [] - if isinstance(input, list): - # extract all OpenAIResponseInputFunctionToolCallOutput items - # so their corresponding OpenAIToolMessageParam instances can - # be added immediately following the corresponding - # OpenAIAssistantMessageParam - tool_call_results = {} - for input_item in input: - if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput): - tool_call_results[input_item.call_id] = OpenAIToolMessageParam( - content=input_item.output, - tool_call_id=input_item.call_id, - ) - - for input_item in input: - if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput): - # skip as these have been extracted and inserted in order - pass - elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall): - tool_call = OpenAIChatCompletionToolCall( - index=0, - id=input_item.call_id, - function=OpenAIChatCompletionToolCallFunction( - name=input_item.name, - arguments=input_item.arguments, - ), - ) - messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) - if input_item.call_id in tool_call_results: - messages.append(tool_call_results[input_item.call_id]) - del tool_call_results[input_item.call_id] - elif isinstance(input_item, OpenAIResponseOutputMessageMCPCall): - tool_call = OpenAIChatCompletionToolCall( - index=0, - id=input_item.id, - function=OpenAIChatCompletionToolCallFunction( - name=input_item.name, - arguments=input_item.arguments, - ), - ) - messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) - messages.append( - OpenAIToolMessageParam( - content=input_item.output, - tool_call_id=input_item.id, - ) - ) - elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools): - # the tool list will be handled separately - pass - elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance( - input_item, OpenAIResponseMCPApprovalResponse - ): - # these are handled by the responses impl itself and not pass through to chat completions - pass - else: - content = await convert_response_content_to_chat_content(input_item.content) - message_type = await get_message_type_by_role(input_item.role) - if message_type is None: - raise ValueError( - f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context" - ) - # Skip user messages that duplicate the last user message in previous_messages - # This handles cases where input includes context for function_call_outputs - if previous_messages and input_item.role == "user": - last_user_msg = None - for msg in reversed(previous_messages): - if isinstance(msg, OpenAIUserMessageParam): - last_user_msg = msg - break - if last_user_msg: - last_user_content = getattr(last_user_msg, "content", None) - if last_user_content == content: - continue # Skip duplicate user message - messages.append(message_type(content=content)) - if len(tool_call_results): - # Check if unpaired function_call_outputs reference function_calls from previous messages - if previous_messages: - previous_call_ids = _extract_tool_call_ids(previous_messages) - for call_id in list(tool_call_results.keys()): - if call_id in previous_call_ids: - # Valid: this output references a call from previous messages - # Add the tool message - messages.append(tool_call_results[call_id]) - del tool_call_results[call_id] - - # If still have unpaired outputs, error - if len(tool_call_results): - raise ValueError( - f"Received function_call_output(s) with call_id(s) {tool_call_results.keys()}, but no corresponding function_call" - ) - else: - messages.append(OpenAIUserMessageParam(content=input)) - return messages - - -def _extract_tool_call_ids(messages: list[OpenAIMessageParam]) -> set[str]: - """Extract all tool_call IDs from messages.""" - call_ids = set() - for msg in messages: - if isinstance(msg, OpenAIAssistantMessageParam): - tool_calls = getattr(msg, "tool_calls", None) - if tool_calls: - for tool_call in tool_calls: - # tool_call is a Pydantic model, use attribute access - call_ids.add(tool_call.id) - return call_ids - - -async def convert_response_text_to_chat_response_format( - text: OpenAIResponseText, -) -> OpenAIResponseFormatParam: - """ - Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format. - """ - if not text.format or text.format["type"] == "text": - return OpenAIResponseFormatText(type="text") - if text.format["type"] == "json_object": - return OpenAIResponseFormatJSONObject() - if text.format["type"] == "json_schema": - return OpenAIResponseFormatJSONSchema( - json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"]) - ) - raise ValueError(f"Unsupported text format: {text.format}") - - -async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None: - """Get the appropriate OpenAI message parameter type for a given role.""" - role_to_type = { - "user": OpenAIUserMessageParam, - "system": OpenAISystemMessageParam, - "assistant": OpenAIAssistantMessageParam, - "developer": OpenAIDeveloperMessageParam, - } - return role_to_type.get(role) - - -def _extract_citations_from_text( - text: str, citation_files: dict[str, str] -) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]: - """Extract citation markers from text and create annotations - - Args: - text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A] - citation_files: Dictionary mapping file_id to filename - - Returns: - Tuple of (annotations_list, clean_text_without_markers) - """ - file_id_regex = re.compile(r"<\|(?Pfile-[A-Za-z0-9_-]+)\|>") - - annotations = [] - parts = [] - total_len = 0 - last_end = 0 - - for m in file_id_regex.finditer(text): - # segment before the marker - prefix = text[last_end : m.start()] - - # drop one space if it exists (since marker is at sentence end) - if prefix.endswith(" "): - prefix = prefix[:-1] - - parts.append(prefix) - total_len += len(prefix) - - fid = m.group(1) - if fid in citation_files: - annotations.append( - OpenAIResponseAnnotationFileCitation( - file_id=fid, - filename=citation_files[fid], - index=total_len, # index points to punctuation - ) - ) - - last_end = m.end() - - parts.append(text[last_end:]) - cleaned_text = "".join(parts) - return annotations, cleaned_text - - -def is_function_tool_call( - tool_call: OpenAIChatCompletionToolCall, - tools: list[OpenAIResponseInputTool], -) -> bool: - if not tool_call.function: - return False - for t in tools: - if t.type == "function" and t.name == tool_call.function.name: - return True - return False - - -async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None: - """Run guardrails against messages and return violation message if blocked.""" - if not messages: - return None - - # Look up shields to get their provider_resource_id (actual model ID) - model_ids = [] - shields_list = await safety_api.routing_table.list_shields() - - for guardrail_id in guardrail_ids: - matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id] - if matching_shields: - model_id = matching_shields[0].provider_resource_id - model_ids.append(model_id) - else: - raise ValueError(f"No shield found with identifier '{guardrail_id}'") - - guardrail_tasks = [safety_api.run_moderation(messages, model=model_id) for model_id in model_ids] - responses = await asyncio.gather(*guardrail_tasks) - - for response in responses: - for result in response.results: - if result.flagged: - message = result.user_message or "Content blocked by safety guardrails" - flagged_categories = [cat for cat, flagged in result.categories.items() if flagged] - violation_type = result.metadata.get("violation_type", []) if result.metadata else [] - - if flagged_categories: - message += f" (flagged for: {', '.join(flagged_categories)})" - if violation_type: - message += f" (violation type: {', '.join(violation_type)})" - - return message - - -def extract_guardrail_ids(guardrails: list | None) -> list[str]: - """Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects.""" - if not guardrails: - return [] - - guardrail_ids = [] - for guardrail in guardrails: - if isinstance(guardrail, str): - guardrail_ids.append(guardrail) - elif isinstance(guardrail, ResponseGuardrailSpec): - guardrail_ids.append(guardrail.type) - else: - raise ValueError(f"Unknown guardrail format: {guardrail}, expected str or ResponseGuardrailSpec") - - return guardrail_ids diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py deleted file mode 100644 index 8f3ecf5c9..000000000 --- a/llama_stack/providers/inline/agents/meta_reference/safety.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio - -from llama_stack.apis.inference import Message -from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel -from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry import tracing - -log = get_logger(name=__name__, category="agents::meta_reference") - - -class SafetyException(Exception): # noqa: N818 - def __init__(self, violation: SafetyViolation): - self.violation = violation - super().__init__(violation.user_message) - - -class ShieldRunnerMixin: - def __init__( - self, - safety_api: Safety, - input_shields: list[str] | None = None, - output_shields: list[str] | None = None, - ): - self.safety_api = safety_api - self.input_shields = input_shields - self.output_shields = output_shields - - async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None: - async def run_shield_with_span(identifier: str): - async with tracing.span(f"run_shield_{identifier}"): - return await self.safety_api.run_shield( - shield_id=identifier, - messages=messages, - params={}, - ) - - responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers]) - for identifier, response in zip(identifiers, responses, strict=False): - if not response.violation: - continue - - violation = response.violation - if violation.violation_level == ViolationLevel.ERROR: - raise SafetyException(violation) - elif violation.violation_level == ViolationLevel.WARN: - log.warning(f"[Warn]{identifier} raised a warning") diff --git a/llama_stack/providers/inline/batches/reference/__init__.py b/llama_stack/providers/inline/batches/reference/__init__.py deleted file mode 100644 index a8ae92eb2..000000000 --- a/llama_stack/providers/inline/batches/reference/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models -from llama_stack.core.datatypes import AccessRule, Api -from llama_stack.providers.utils.kvstore import kvstore_impl - -from .batches import ReferenceBatchesImpl -from .config import ReferenceBatchesImplConfig - -__all__ = ["ReferenceBatchesImpl", "ReferenceBatchesImplConfig"] - - -async def get_provider_impl(config: ReferenceBatchesImplConfig, deps: dict[Api, Any], policy: list[AccessRule]): - kvstore = await kvstore_impl(config.kvstore) - inference_api: Inference | None = deps.get(Api.inference) - files_api: Files | None = deps.get(Api.files) - models_api: Models | None = deps.get(Api.models) - - if inference_api is None: - raise ValueError("Inference API is required but not provided in dependencies") - if files_api is None: - raise ValueError("Files API is required but not provided in dependencies") - if models_api is None: - raise ValueError("Models API is required but not provided in dependencies") - - impl = ReferenceBatchesImpl(config, inference_api, files_api, models_api, kvstore) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/llama_stack/providers/inline/batches/reference/batches.py deleted file mode 100644 index fa581ae1f..000000000 --- a/llama_stack/providers/inline/batches/reference/batches.py +++ /dev/null @@ -1,679 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import hashlib -import itertools -import json -import time -import uuid -from io import BytesIO -from typing import Any, Literal - -from openai.types.batch import BatchError, Errors -from pydantic import BaseModel - -from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse -from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import ( - Inference, - OpenAIAssistantMessageParam, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, - OpenAIDeveloperMessageParam, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIMessageParam, - OpenAISystemMessageParam, - OpenAIToolMessageParam, - OpenAIUserMessageParam, -) -from llama_stack.apis.models import Models -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import KVStore - -from .config import ReferenceBatchesImplConfig - -BATCH_PREFIX = "batch:" - -logger = get_logger(__name__) - - -class AsyncBytesIO: - """ - Async-compatible BytesIO wrapper to allow async file-like operations. - - We use this when uploading files to the Files API, as it expects an - async file-like object. - """ - - def __init__(self, data: bytes): - self._buffer = BytesIO(data) - - async def read(self, n=-1): - return self._buffer.read(n) - - async def seek(self, pos, whence=0): - return self._buffer.seek(pos, whence) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self._buffer.close() - - def __getattr__(self, name): - return getattr(self._buffer, name) - - -class BatchRequest(BaseModel): - line_num: int - custom_id: str - method: str - url: str - body: dict[str, Any] - - -def convert_to_openai_message_param(msg: dict[str, Any]) -> OpenAIMessageParam: - """Convert a message dictionary to OpenAIMessageParam based on role.""" - role = msg.get("role") - - if role == "user": - return OpenAIUserMessageParam(**msg) - elif role == "system": - return OpenAISystemMessageParam(**msg) - elif role == "assistant": - return OpenAIAssistantMessageParam(**msg) - elif role == "tool": - return OpenAIToolMessageParam(**msg) - elif role == "developer": - return OpenAIDeveloperMessageParam(**msg) - else: - raise ValueError(f"Unknown message role: {role}") - - -class ReferenceBatchesImpl(Batches): - """Reference implementation of the Batches API. - - This implementation processes batch files by making individual requests - to the inference API and generates output files with results. - """ - - def __init__( - self, - config: ReferenceBatchesImplConfig, - inference_api: Inference, - files_api: Files, - models_api: Models, - kvstore: KVStore, - ) -> None: - self.config = config - self.kvstore = kvstore - self.inference_api = inference_api - self.files_api = files_api - self.models_api = models_api - self._processing_tasks: dict[str, asyncio.Task] = {} - self._batch_semaphore = asyncio.Semaphore(config.max_concurrent_batches) - self._update_batch_lock = asyncio.Lock() - - # this is to allow tests to disable background processing - self.process_batches = True - - async def initialize(self) -> None: - # TODO: start background processing of existing tasks - pass - - async def shutdown(self) -> None: - """Shutdown the batches provider.""" - if self._processing_tasks: - # don't cancel tasks - just let them stop naturally on shutdown - # cancelling would mark batches as "cancelled" in the database - logger.info(f"Shutdown initiated with {len(self._processing_tasks)} active batch processing tasks") - - # TODO (SECURITY): this currently works w/ configured api keys, not with x-llamastack-provider-data or with user policy restrictions - async def create_batch( - self, - input_file_id: str, - endpoint: str, - completion_window: Literal["24h"], - metadata: dict[str, str] | None = None, - idempotency_key: str | None = None, - ) -> BatchObject: - """ - Create a new batch for processing multiple API requests. - - This implementation provides optional idempotency: when an idempotency key - (idempotency_key) is provided, a deterministic ID is generated based on the input - parameters. If a batch with the same parameters already exists, it will be - returned instead of creating a duplicate. Without an idempotency key, - each request creates a new batch with a unique ID. - - Args: - input_file_id: The ID of an uploaded file containing requests for the batch. - endpoint: The endpoint to be used for all requests in the batch. - completion_window: The time window within which the batch should be processed. - metadata: Optional metadata for the batch. - idempotency_key: Optional idempotency key for enabling idempotent behavior. - - Returns: - The created or existing batch object. - """ - - # Error handling by levels - - # 0. Input param handling, results in 40x errors before processing, e.g. - # - Wrong completion_window - # - Invalid metadata types - # - Unknown endpoint - # -> no batch created - # 1. Errors preventing processing, result in BatchErrors aggregated in process_batch, e.g. - # - input_file_id missing - # - invalid json in file - # - missing custom_id, method, url, body - # - invalid model - # - streaming - # -> batch created, validation sends to failed status - # 2. Processing errors, result in error_file_id entries, e.g. - # - Any error returned from inference endpoint - # -> batch created, goes to completed status - - # TODO: set expiration time for garbage collection - - if endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]: - raise ValueError( - f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint", - ) - - if completion_window != "24h": - raise ValueError( - f"Invalid completion_window: {completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window", - ) - - batch_id = f"batch_{uuid.uuid4().hex[:16]}" - - # For idempotent requests, use the idempotency key for the batch ID - # This ensures the same key always maps to the same batch ID, - # allowing us to detect parameter conflicts - if idempotency_key is not None: - hash_input = idempotency_key.encode("utf-8") - hash_digest = hashlib.sha256(hash_input).hexdigest()[:24] - batch_id = f"batch_{hash_digest}" - - try: - existing_batch = await self.retrieve_batch(batch_id) - - if ( - existing_batch.input_file_id != input_file_id - or existing_batch.endpoint != endpoint - or existing_batch.completion_window != completion_window - or existing_batch.metadata != metadata - ): - raise ConflictError( - f"Idempotency key '{idempotency_key}' was previously used with different parameters. " - "Either use a new idempotency key or ensure all parameters match the original request." - ) - - logger.info(f"Returning existing batch with ID: {batch_id}") - return existing_batch - except ResourceNotFoundError: - # Batch doesn't exist, continue with creation - pass - - current_time = int(time.time()) - - batch = BatchObject( - id=batch_id, - object="batch", - endpoint=endpoint, - input_file_id=input_file_id, - completion_window=completion_window, - status="validating", - created_at=current_time, - metadata=metadata, - ) - - await self.kvstore.set(f"batch:{batch_id}", batch.to_json()) - logger.info(f"Created new batch with ID: {batch_id}") - - if self.process_batches: - task = asyncio.create_task(self._process_batch(batch_id)) - self._processing_tasks[batch_id] = task - - return batch - - async def cancel_batch(self, batch_id: str) -> BatchObject: - """Cancel a batch that is in progress.""" - batch = await self.retrieve_batch(batch_id) - - if batch.status in ["cancelled", "cancelling"]: - return batch - - if batch.status in ["completed", "failed", "expired"]: - raise ConflictError(f"Cannot cancel batch '{batch_id}' with status '{batch.status}'") - - await self._update_batch(batch_id, status="cancelling", cancelling_at=int(time.time())) - - if batch_id in self._processing_tasks: - self._processing_tasks[batch_id].cancel() - # note: task removal and status="cancelled" handled in finally block of _process_batch - - return await self.retrieve_batch(batch_id) - - async def list_batches( - self, - after: str | None = None, - limit: int = 20, - ) -> ListBatchesResponse: - """ - List all batches, eventually only for the current user. - - With no notion of user, we return all batches. - """ - batch_values = await self.kvstore.values_in_range("batch:", "batch:\xff") - - batches = [] - for batch_data in batch_values: - if batch_data: - batches.append(BatchObject.model_validate_json(batch_data)) - - batches.sort(key=lambda b: b.created_at, reverse=True) - - start_idx = 0 - if after: - for i, batch in enumerate(batches): - if batch.id == after: - start_idx = i + 1 - break - - page_batches = batches[start_idx : start_idx + limit] - has_more = (start_idx + limit) < len(batches) - - first_id = page_batches[0].id if page_batches else None - last_id = page_batches[-1].id if page_batches else None - - return ListBatchesResponse( - data=page_batches, - first_id=first_id, - last_id=last_id, - has_more=has_more, - ) - - async def retrieve_batch(self, batch_id: str) -> BatchObject: - """Retrieve information about a specific batch.""" - batch_data = await self.kvstore.get(f"batch:{batch_id}") - if not batch_data: - raise ResourceNotFoundError(batch_id, "Batch", "batches.list()") - - return BatchObject.model_validate_json(batch_data) - - async def _update_batch(self, batch_id: str, **updates) -> None: - """Update batch fields in kvstore.""" - async with self._update_batch_lock: - try: - batch = await self.retrieve_batch(batch_id) - - # batch processing is async. once cancelling, only allow "cancelled" status updates - if batch.status == "cancelling" and updates.get("status") != "cancelled": - logger.info( - f"Skipping status update for cancelled batch {batch_id}: attempted {updates.get('status')}" - ) - return - - if "errors" in updates: - updates["errors"] = updates["errors"].model_dump() - - batch_dict = batch.model_dump() - batch_dict.update(updates) - - await self.kvstore.set(f"batch:{batch_id}", json.dumps(batch_dict)) - except Exception as e: - logger.error(f"Failed to update batch {batch_id}: {e}") - - async def _validate_input(self, batch: BatchObject) -> tuple[list[BatchError], list[BatchRequest]]: - """ - Read & validate input, return errors and valid input. - - Validation of - - input_file_id existance - - valid json - - custom_id, method, url, body presence and valid - - no streaming - """ - requests: list[BatchRequest] = [] - errors: list[BatchError] = [] - try: - await self.files_api.openai_retrieve_file(batch.input_file_id) - except Exception: - errors.append( - BatchError( - code="invalid_request", - line=None, - message=f"Cannot find file {batch.input_file_id}.", - param="input_file_id", - ) - ) - return errors, requests - - # TODO(SECURITY): do something about large files - file_content_response = await self.files_api.openai_retrieve_file_content(batch.input_file_id) - file_content = file_content_response.body.decode("utf-8") - for line_num, line in enumerate(file_content.strip().split("\n"), 1): - if line.strip(): # skip empty lines - try: - request = json.loads(line) - - if not isinstance(request, dict): - errors.append( - BatchError( - code="invalid_request", - line=line_num, - message="Each line must be a JSON dictionary object", - ) - ) - continue - - valid = True - - for param, expected_type, type_string in [ - ("custom_id", str, "string"), - ("method", str, "string"), - ("url", str, "string"), - ("body", dict, "JSON dictionary object"), - ]: - if param not in request: - errors.append( - BatchError( - code="missing_required_parameter", - line=line_num, - message=f"Missing required parameter: {param}", - param=param, - ) - ) - valid = False - elif not isinstance(request[param], expected_type): - param_name = "URL" if param == "url" else param.capitalize() - errors.append( - BatchError( - code="invalid_request", - line=line_num, - message=f"{param_name} must be a {type_string}", - param=param, - ) - ) - valid = False - - if (url := request.get("url")) and isinstance(url, str) and url != batch.endpoint: - errors.append( - BatchError( - code="invalid_url", - line=line_num, - message="URL provided for this request does not match the batch endpoint", - param="url", - ) - ) - valid = False - - if (body := request.get("body")) and isinstance(body, dict): - if body.get("stream", False): - errors.append( - BatchError( - code="streaming_unsupported", - line=line_num, - message="Streaming is not supported in batch processing", - param="body.stream", - ) - ) - valid = False - - if batch.endpoint == "/v1/chat/completions": - required_params: list[tuple[str, Any, str]] = [ - ("model", str, "a string"), - # messages is specific to /v1/chat/completions - # we could skip validating messages here and let inference fail. however, - # that would be a very expensive way to find out messages is wrong. - ("messages", list, "an array"), # TODO: allow messages to be a string? - ] - elif batch.endpoint == "/v1/completions": - required_params = [ - ("model", str, "a string"), - ("prompt", str, "a string"), # TODO: allow prompt to be a list of strings?? - ] - else: # /v1/embeddings - required_params = [ - ("model", str, "a string"), - ("input", (str, list), "a string or array of strings"), - ] - - for param, expected_type, type_string in required_params: - if param not in body: - errors.append( - BatchError( - code="invalid_request", - line=line_num, - message=f"{param.capitalize()} parameter is required", - param=f"body.{param}", - ) - ) - valid = False - elif not isinstance(body[param], expected_type): - errors.append( - BatchError( - code="invalid_request", - line=line_num, - message=f"{param.capitalize()} must be {type_string}", - param=f"body.{param}", - ) - ) - valid = False - - if "model" in body and isinstance(body["model"], str): - try: - await self.models_api.get_model(body["model"]) - except Exception: - errors.append( - BatchError( - code="model_not_found", - line=line_num, - message=f"Model '{body['model']}' does not exist or is not supported", - param="body.model", - ) - ) - valid = False - - if valid: - assert isinstance(url, str), "URL must be a string" # for mypy - assert isinstance(body, dict), "Body must be a dictionary" # for mypy - requests.append( - BatchRequest( - line_num=line_num, - url=url, - method=request["method"], - custom_id=request["custom_id"], - body=body, - ), - ) - except json.JSONDecodeError: - errors.append( - BatchError( - code="invalid_json_line", - line=line_num, - message="This line is not parseable as valid JSON.", - ) - ) - - return errors, requests - - async def _process_batch(self, batch_id: str) -> None: - """Background task to process a batch of requests.""" - try: - logger.info(f"Starting batch processing for {batch_id}") - async with self._batch_semaphore: # semaphore to limit concurrency - logger.info(f"Acquired semaphore for batch {batch_id}") - await self._process_batch_impl(batch_id) - except asyncio.CancelledError: - logger.info(f"Batch processing cancelled for {batch_id}") - await self._update_batch(batch_id, status="cancelled", cancelled_at=int(time.time())) - except Exception as e: - logger.error(f"Batch processing failed for {batch_id}: {e}") - await self._update_batch( - batch_id, - status="failed", - failed_at=int(time.time()), - errors=Errors(data=[BatchError(code="internal_error", message=str(e))]), - ) - finally: - self._processing_tasks.pop(batch_id, None) - - async def _process_batch_impl(self, batch_id: str) -> None: - """Implementation of batch processing logic.""" - errors: list[BatchError] = [] - batch = await self.retrieve_batch(batch_id) - - errors, requests = await self._validate_input(batch) - if errors: - await self._update_batch(batch_id, status="failed", failed_at=int(time.time()), errors=Errors(data=errors)) - logger.info(f"Batch validation failed for {batch_id} with {len(errors)} errors") - return - - logger.info(f"Processing {len(requests)} requests for batch {batch_id}") - - total_requests = len(requests) - await self._update_batch( - batch_id, - status="in_progress", - request_counts={"total": total_requests, "completed": 0, "failed": 0}, - ) - - error_results = [] - success_results = [] - completed_count = 0 - failed_count = 0 - - for chunk in itertools.batched(requests, self.config.max_concurrent_requests_per_batch): - # we use a TaskGroup to ensure all process-single-request tasks are canceled when process-batch is cancelled - async with asyncio.TaskGroup() as tg: - chunk_tasks = [tg.create_task(self._process_single_request(batch_id, request)) for request in chunk] - - chunk_results = await asyncio.gather(*chunk_tasks, return_exceptions=True) - - for result in chunk_results: - if isinstance(result, dict) and result.get("error") is not None: # error response from inference - failed_count += 1 - error_results.append(result) - elif isinstance(result, dict) and result.get("response") is not None: # successful inference - completed_count += 1 - success_results.append(result) - else: # unexpected result - failed_count += 1 - errors.append(BatchError(code="internal_error", message=f"Unexpected result: {result}")) - - await self._update_batch( - batch_id, - request_counts={"total": total_requests, "completed": completed_count, "failed": failed_count}, - ) - - if errors: - await self._update_batch( - batch_id, status="failed", failed_at=int(time.time()), errors=Errors(data=errors) - ) - return - - try: - output_file_id = await self._create_output_file(batch_id, success_results, "success") - await self._update_batch(batch_id, output_file_id=output_file_id) - - error_file_id = await self._create_output_file(batch_id, error_results, "error") - await self._update_batch(batch_id, error_file_id=error_file_id) - - await self._update_batch(batch_id, status="completed", completed_at=int(time.time())) - - logger.info( - f"Batch processing completed for {batch_id}: {completed_count} completed, {failed_count} failed" - ) - except Exception as e: - # note: errors is empty at this point, so we don't lose anything by ignoring it - await self._update_batch( - batch_id, - status="failed", - failed_at=int(time.time()), - errors=Errors(data=[BatchError(code="output_failed", message=str(e))]), - ) - - async def _process_single_request(self, batch_id: str, request: BatchRequest) -> dict: - """Process a single request from the batch.""" - request_id = f"batch_req_{batch_id}_{request.line_num}" - - try: - # TODO(SECURITY): review body for security issues - if request.url == "/v1/chat/completions": - request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]] - chat_params = OpenAIChatCompletionRequestWithExtraBody(**request.body) - chat_response = await self.inference_api.openai_chat_completion(chat_params) - - # this is for mypy, we don't allow streaming so we'll get the right type - assert hasattr(chat_response, "model_dump_json"), "Chat response must have model_dump_json method" - return { - "id": request_id, - "custom_id": request.custom_id, - "response": { - "status_code": 200, - "request_id": request_id, # TODO: should this be different? - "body": chat_response.model_dump_json(), - }, - } - elif request.url == "/v1/completions": - completion_params = OpenAICompletionRequestWithExtraBody(**request.body) - completion_response = await self.inference_api.openai_completion(completion_params) - - # this is for mypy, we don't allow streaming so we'll get the right type - assert hasattr(completion_response, "model_dump_json"), ( - "Completion response must have model_dump_json method" - ) - return { - "id": request_id, - "custom_id": request.custom_id, - "response": { - "status_code": 200, - "request_id": request_id, - "body": completion_response.model_dump_json(), - }, - } - else: # /v1/embeddings - embeddings_response = await self.inference_api.openai_embeddings( - OpenAIEmbeddingsRequestWithExtraBody(**request.body) - ) - assert hasattr(embeddings_response, "model_dump_json"), ( - "Embeddings response must have model_dump_json method" - ) - return { - "id": request_id, - "custom_id": request.custom_id, - "response": { - "status_code": 200, - "request_id": request_id, # TODO: should this be different? - "body": embeddings_response.model_dump_json(), - }, - } - except Exception as e: - logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}") - return { - "id": request_id, - "custom_id": request.custom_id, - "error": {"type": "request_failed", "message": str(e)}, - } - - async def _create_output_file(self, batch_id: str, results: list[dict], file_type: str) -> str: - """ - Create an output file with batch results. - - This function filters results based on the specified file_type - and uploads the file to the Files API. - """ - output_lines = [json.dumps(result) for result in results] - - with AsyncBytesIO("\n".join(output_lines).encode("utf-8")) as file_buffer: - file_buffer.filename = f"{batch_id}_{file_type}.jsonl" - uploaded_file = await self.files_api.openai_upload_file(file=file_buffer, purpose=OpenAIFilePurpose.BATCH) - return uploaded_file.id diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py deleted file mode 100644 index e8ebeb30d..000000000 --- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any - -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset -from llama_stack.providers.datatypes import DatasetsProtocolPrivate -from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.pagination import paginate_records - -from .config import LocalFSDatasetIOConfig - -DATASETS_PREFIX = "localfs_datasets:" - - -class PandasDataframeDataset: - def __init__(self, dataset_def: Dataset, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self.dataset_def = dataset_def - self.df = None - - def __len__(self) -> int: - assert self.df is not None, "Dataset not loaded. Please call .load() first" - return len(self.df) - - def __getitem__(self, idx): - assert self.df is not None, "Dataset not loaded. Please call .load() first" - if isinstance(idx, slice): - return self.df.iloc[idx].to_dict(orient="records") - else: - return self.df.iloc[idx].to_dict() - - async def load(self) -> None: - if self.df is not None: - return - - if self.dataset_def.source.type == "uri": - self.df = await get_dataframe_from_uri(self.dataset_def.source.uri) - elif self.dataset_def.source.type == "rows": - import pandas - - self.df = pandas.DataFrame(self.dataset_def.source.rows) - else: - raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}") - - if self.df is None: - raise ValueError(f"Failed to load dataset from {self.dataset_def.url}") - - -class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): - def __init__(self, config: LocalFSDatasetIOConfig) -> None: - self.config = config - # local registry for keeping track of datasets within the provider - self.dataset_infos = {} - self.kvstore = None - - async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) - # Load existing datasets from kvstore - start_key = DATASETS_PREFIX - end_key = f"{DATASETS_PREFIX}\xff" - stored_datasets = await self.kvstore.values_in_range(start_key, end_key) - - for dataset in stored_datasets: - dataset = Dataset.model_validate_json(dataset) - self.dataset_infos[dataset.identifier] = dataset - - async def shutdown(self) -> None: ... - - async def register_dataset( - self, - dataset_def: Dataset, - ) -> None: - # Store in kvstore - key = f"{DATASETS_PREFIX}{dataset_def.identifier}" - await self.kvstore.set( - key=key, - value=dataset_def.model_dump_json(), - ) - self.dataset_infos[dataset_def.identifier] = dataset_def - - async def unregister_dataset(self, dataset_id: str) -> None: - key = f"{DATASETS_PREFIX}{dataset_id}" - await self.kvstore.delete(key=key) - del self.dataset_infos[dataset_id] - - async def iterrows( - self, - dataset_id: str, - start_index: int | None = None, - limit: int | None = None, - ) -> PaginatedResponse: - dataset_def = self.dataset_infos[dataset_id] - dataset_impl = PandasDataframeDataset(dataset_def) - await dataset_impl.load() - - records = dataset_impl.df.to_dict("records") - return paginate_records(records, start_index, limit) - - async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: - import pandas - - dataset_def = self.dataset_infos[dataset_id] - dataset_impl = PandasDataframeDataset(dataset_def) - await dataset_impl.load() - - new_rows_df = pandas.DataFrame(rows) - dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py deleted file mode 100644 index 3c1e2e462..000000000 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import json -from typing import Any - -from tqdm import tqdm - -from llama_stack.apis.agents import Agents, StepType -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import ( - Inference, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, - OpenAISystemMessageParam, - OpenAIUserMessageParam, - UserMessage, -) -from llama_stack.apis.scoring import Scoring -from llama_stack.providers.datatypes import BenchmarksProtocolPrivate -from llama_stack.providers.inline.agents.meta_reference.agent_instance import ( - MEMORY_QUERY_TOOL, -) -from llama_stack.providers.utils.common.data_schema_validator import ColumnName -from llama_stack.providers.utils.kvstore import kvstore_impl - -from .....apis.common.job_types import Job, JobStatus -from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse -from .config import MetaReferenceEvalConfig - -EVAL_TASKS_PREFIX = "benchmarks:" - - -class MetaReferenceEvalImpl( - Eval, - BenchmarksProtocolPrivate, -): - def __init__( - self, - config: MetaReferenceEvalConfig, - datasetio_api: DatasetIO, - datasets_api: Datasets, - scoring_api: Scoring, - inference_api: Inference, - agents_api: Agents, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets_api - self.scoring_api = scoring_api - self.inference_api = inference_api - self.agents_api = agents_api - - # TODO: assume sync job, will need jobs API for async scheduling - self.jobs = {} - - self.benchmarks = {} - - async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) - # Load existing benchmarks from kvstore - start_key = EVAL_TASKS_PREFIX - end_key = f"{EVAL_TASKS_PREFIX}\xff" - stored_benchmarks = await self.kvstore.values_in_range(start_key, end_key) - - for benchmark in stored_benchmarks: - benchmark = Benchmark.model_validate_json(benchmark) - self.benchmarks[benchmark.identifier] = benchmark - - async def shutdown(self) -> None: ... - - async def register_benchmark(self, task_def: Benchmark) -> None: - # Store in kvstore - key = f"{EVAL_TASKS_PREFIX}{task_def.identifier}" - await self.kvstore.set( - key=key, - value=task_def.model_dump_json(), - ) - self.benchmarks[task_def.identifier] = task_def - - async def unregister_benchmark(self, benchmark_id: str) -> None: - if benchmark_id in self.benchmarks: - del self.benchmarks[benchmark_id] - - key = f"{EVAL_TASKS_PREFIX}{benchmark_id}" - await self.kvstore.delete(key) - - async def run_eval( - self, - benchmark_id: str, - benchmark_config: BenchmarkConfig, - ) -> Job: - task_def = self.benchmarks[benchmark_id] - dataset_id = task_def.dataset_id - scoring_functions = task_def.scoring_functions - - # TODO (xiyan): validate dataset schema - # dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) - - all_rows = await self.datasetio_api.iterrows( - dataset_id=dataset_id, - limit=(-1 if benchmark_config.num_examples is None else benchmark_config.num_examples), - ) - res = await self.evaluate_rows( - benchmark_id=benchmark_id, - input_rows=all_rows.data, - scoring_functions=scoring_functions, - benchmark_config=benchmark_config, - ) - - # TODO: currently needs to wait for generation before returning - # need job scheduler queue (ray/celery) w/ jobs api - job_id = str(len(self.jobs)) - self.jobs[job_id] = res - return Job(job_id=job_id, status=JobStatus.completed) - - async def _run_agent_generation( - self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig - ) -> list[dict[str, Any]]: - candidate = benchmark_config.eval_candidate - create_response = await self.agents_api.create_agent(candidate.config) - agent_id = create_response.agent_id - - generations = [] - for i, x in tqdm(enumerate(input_rows)): - assert ColumnName.chat_completion_input.value in x, "Invalid input row" - input_messages = json.loads(x[ColumnName.chat_completion_input.value]) - input_messages = [UserMessage(**x) for x in input_messages if x["role"] == "user"] - - # NOTE: only single-turn agent generation is supported. Create a new session for each input row - session_create_response = await self.agents_api.create_agent_session(agent_id, f"session-{i}") - session_id = session_create_response.session_id - - turn_request = dict( - agent_id=agent_id, - session_id=session_id, - messages=input_messages, - stream=True, - ) - turn_response = [chunk async for chunk in await self.agents_api.create_agent_turn(**turn_request)] - final_event = turn_response[-1].event.payload - - # check if there's a memory retrieval step and extract the context - memory_rag_context = None - for step in final_event.turn.steps: - if step.step_type == StepType.tool_execution.value: - for tool_response in step.tool_responses: - if tool_response.tool_name == MEMORY_QUERY_TOOL: - memory_rag_context = " ".join(x.text for x in tool_response.content) - - agent_generation = {} - agent_generation[ColumnName.generated_answer.value] = final_event.turn.output_message.content - if memory_rag_context: - agent_generation[ColumnName.context.value] = memory_rag_context - - generations.append(agent_generation) - - return generations - - async def _run_model_generation( - self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig - ) -> list[dict[str, Any]]: - candidate = benchmark_config.eval_candidate - assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided" - sampling_params = {"max_tokens": candidate.sampling_params.max_tokens} - - generations = [] - for x in tqdm(input_rows): - if ColumnName.completion_input.value in x: - if candidate.sampling_params.stop: - sampling_params["stop"] = candidate.sampling_params.stop - - input_content = json.loads(x[ColumnName.completion_input.value]) - params = OpenAICompletionRequestWithExtraBody( - model=candidate.model, - prompt=input_content, - **sampling_params, - ) - response = await self.inference_api.openai_completion(params) - generations.append({ColumnName.generated_answer.value: response.choices[0].text}) - elif ColumnName.chat_completion_input.value in x: - chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value]) - input_messages = [ - OpenAIUserMessageParam(**x) for x in chat_completion_input_json if x["role"] == "user" - ] - - messages = [] - if candidate.system_message: - messages.append(candidate.system_message) - - messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"] - - messages += input_messages - params = OpenAIChatCompletionRequestWithExtraBody( - model=candidate.model, - messages=messages, - **sampling_params, - ) - response = await self.inference_api.openai_chat_completion(params) - generations.append({ColumnName.generated_answer.value: response.choices[0].message.content}) - else: - raise ValueError("Invalid input row") - - return generations - - async def evaluate_rows( - self, - benchmark_id: str, - input_rows: list[dict[str, Any]], - scoring_functions: list[str], - benchmark_config: BenchmarkConfig, - ) -> EvaluateResponse: - candidate = benchmark_config.eval_candidate - if candidate.type == "agent": - generations = await self._run_agent_generation(input_rows, benchmark_config) - elif candidate.type == "model": - generations = await self._run_model_generation(input_rows, benchmark_config) - else: - raise ValueError(f"Invalid candidate type: {candidate.type}") - - # scoring with generated_answer - score_input_rows = [ - input_r | generated_r for input_r, generated_r in zip(input_rows, generations, strict=False) - ] - - if benchmark_config.scoring_params is not None: - scoring_functions_dict = { - scoring_fn_id: benchmark_config.scoring_params.get(scoring_fn_id, None) - for scoring_fn_id in scoring_functions - } - else: - scoring_functions_dict = dict.fromkeys(scoring_functions) - - score_response = await self.scoring_api.score( - input_rows=score_input_rows, scoring_functions=scoring_functions_dict - ) - - return EvaluateResponse(generations=generations, scores=score_response.results) - - async def job_status(self, benchmark_id: str, job_id: str) -> Job: - if job_id in self.jobs: - return Job(job_id=job_id, status=JobStatus.completed) - - raise ValueError(f"Job {job_id} not found") - - async def job_cancel(self, benchmark_id: str, job_id: str) -> None: - raise NotImplementedError("Job cancel is not implemented yet") - - async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: - job = await self.job_status(benchmark_id, job_id) - status = job.status - if not status or status != JobStatus.completed: - raise ValueError(f"Job is not completed, Status: {status.value}") - - return self.jobs[job_id] diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py deleted file mode 100644 index a76b982ce..000000000 --- a/llama_stack/providers/inline/files/localfs/files.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import time -import uuid -from pathlib import Path -from typing import Annotated - -from fastapi import Depends, File, Form, Response, UploadFile - -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import ( - ExpiresAfter, - Files, - ListOpenAIFileResponse, - OpenAIFileDeleteResponse, - OpenAIFileObject, - OpenAIFilePurpose, -) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.id_generation import generate_object_id -from llama_stack.log import get_logger -from llama_stack.providers.utils.files.form_data import parse_expires_after -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl - -from .config import LocalfsFilesImplConfig - -logger = get_logger(name=__name__, category="files") - - -class LocalfsFilesImpl(Files): - def __init__(self, config: LocalfsFilesImplConfig, policy: list[AccessRule]) -> None: - self.config = config - self.policy = policy - self.sql_store: AuthorizedSqlStore | None = None - - async def initialize(self) -> None: - """Initialize the files provider by setting up storage directory and metadata database.""" - # Create storage directory if it doesn't exist - storage_path = Path(self.config.storage_dir) - storage_path.mkdir(parents=True, exist_ok=True) - - # Initialize SQL store for metadata - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.config.metadata_store), self.policy) - await self.sql_store.create_table( - "openai_files", - { - "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "filename": ColumnType.STRING, - "purpose": ColumnType.STRING, - "bytes": ColumnType.INTEGER, - "created_at": ColumnType.INTEGER, - "expires_at": ColumnType.INTEGER, - "file_path": ColumnType.STRING, # Path to actual file on disk - }, - ) - - async def shutdown(self) -> None: - pass - - def _generate_file_id(self) -> str: - """Generate a unique file ID for OpenAI API.""" - return generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}") - - def _get_file_path(self, file_id: str) -> Path: - """Get the filesystem path for a file ID.""" - return Path(self.config.storage_dir) / file_id - - async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]: - """Look up a OpenAIFileObject and filesystem path from its ID.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") - - row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) - if not row: - raise ResourceNotFoundError(file_id, "File", "client.files.list()") - - file_path = Path(row.pop("file_path")) - return OpenAIFileObject(**row), file_path - - # OpenAI Files API Implementation - async def openai_upload_file( - self, - file: Annotated[UploadFile, File()], - purpose: Annotated[OpenAIFilePurpose, Form()], - expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, - ) -> OpenAIFileObject: - """Upload a file that can be used across various endpoints.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") - - if expires_after is not None: - logger.warning( - f"File expiration is not supported by this provider, ignoring expires_after: {expires_after}" - ) - - file_id = self._generate_file_id() - file_path = self._get_file_path(file_id) - - content = await file.read() - file_size = len(content) - - with open(file_path, "wb") as f: - f.write(content) - - created_at = int(time.time()) - expires_at = created_at + self.config.ttl_secs - - await self.sql_store.insert( - "openai_files", - { - "id": file_id, - "filename": file.filename or "uploaded_file", - "purpose": purpose.value, - "bytes": file_size, - "created_at": created_at, - "expires_at": expires_at, - "file_path": file_path.as_posix(), - }, - ) - - return OpenAIFileObject( - id=file_id, - filename=file.filename or "uploaded_file", - purpose=purpose, - bytes=file_size, - created_at=created_at, - expires_at=expires_at, - ) - - async def openai_list_files( - self, - after: str | None = None, - limit: int | None = 10000, - order: Order | None = Order.desc, - purpose: OpenAIFilePurpose | None = None, - ) -> ListOpenAIFileResponse: - """Returns a list of files that belong to the user's organization.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") - - if not order: - order = Order.desc - - where_conditions = {} - if purpose: - where_conditions["purpose"] = purpose.value - - paginated_result = await self.sql_store.fetch_all( - table="openai_files", - where=where_conditions if where_conditions else None, - order_by=[("created_at", order.value)], - cursor=("id", after) if after else None, - limit=limit, - ) - - files = [ - OpenAIFileObject( - id=row["id"], - filename=row["filename"], - purpose=OpenAIFilePurpose(row["purpose"]), - bytes=row["bytes"], - created_at=row["created_at"], - expires_at=row["expires_at"], - ) - for row in paginated_result.data - ] - - return ListOpenAIFileResponse( - data=files, - has_more=paginated_result.has_more, - first_id=files[0].id if files else "", - last_id=files[-1].id if files else "", - ) - - async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: - """Returns information about a specific file.""" - file_obj, _ = await self._lookup_file_id(file_id) - - return file_obj - - async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: - """Delete a file.""" - # Delete physical file - _, file_path = await self._lookup_file_id(file_id) - if file_path.exists(): - file_path.unlink() - - # Delete metadata from database - assert self.sql_store is not None, "Files provider not initialized" - await self.sql_store.delete("openai_files", where={"id": file_id}) - - return OpenAIFileDeleteResponse( - id=file_id, - deleted=True, - ) - - async def openai_retrieve_file_content(self, file_id: str) -> Response: - """Returns the contents of the specified file.""" - # Read file content - file_obj, file_path = await self._lookup_file_id(file_id) - - if not file_path.exists(): - logger.warning(f"File '{file_id}'s underlying '{file_path}' is missing, deleting metadata.") - await self.openai_delete_file(file_id) - raise ResourceNotFoundError(file_id, "File", "client.files.list()") - - # Return as binary response with appropriate content type - return Response( - content=file_path.read_bytes(), - media_type="application/octet-stream", - headers={"Content-Disposition": f'attachment; filename="{file_obj.filename}"'}, - ) diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py deleted file mode 100644 index 961548f9c..000000000 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, field_validator - -from llama_stack.apis.inference import QuantizationConfig -from llama_stack.providers.utils.inference import supported_inference_models - - -class MetaReferenceInferenceConfig(BaseModel): - # this is a placeholder to indicate inference model id - # the actual inference model id is dtermined by the moddel id in the request - # Note: you need to register the model before using it for inference - # models in the resouce list in the run.yaml config will be registered automatically - model: str | None = None - torch_seed: int | None = None - max_seq_len: int = 4096 - max_batch_size: int = 1 - model_parallel_size: int | None = None - - # when this is False, we assume that the distributed process group is setup by someone - # outside of this code (e.g., when run inside `torchrun`). that is useful for clients - # (including our testing code) who might be using llama-stack as a library. - create_distributed_process_group: bool = True - - # By default, the implementation will look at ~/.llama/checkpoints/ but you - # can override by specifying the directory explicitly - checkpoint_dir: str | None = None - - quantization: QuantizationConfig | None = None - - @field_validator("model") - @classmethod - def validate_model(cls, model: str) -> str: - permitted_models = supported_inference_models() - descriptors = [m.descriptor() for m in permitted_models] - repos = [m.huggingface_repo for m in permitted_models if m.huggingface_repo is not None] - if model not in (descriptors + repos): - model_list = "\n\t".join(repos) - raise ValueError(f"Unknown model: `{model}`. Choose from [\n\t{model_list}\n]") - return model - - @classmethod - def sample_run_config( - cls, - model: str = "Llama3.2-3B-Instruct", - checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}", - quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}", - model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}", - max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}", - max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}", - **kwargs, - ) -> dict[str, Any]: - return { - "model": model, - "checkpoint_dir": checkpoint_dir, - "quantization": { - "type": quantization_type, - }, - "model_parallel_size": model_parallel_size, - "max_batch_size": max_batch_size, - "max_seq_len": max_seq_len, - } diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/llama_stack/providers/inline/inference/meta_reference/generators.py deleted file mode 100644 index cb926f529..000000000 --- a/llama_stack/providers/inline/inference/meta_reference/generators.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import math -from collections.abc import Generator -from typing import Optional - -import torch -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData - -from llama_stack.apis.inference import ( - GreedySamplingStrategy, - JsonSchemaResponseFormat, - ResponseFormat, - SamplingParams, - TopPSamplingStrategy, -) -from llama_stack.models.llama.datatypes import QuantizationMode -from llama_stack.models.llama.llama3.generation import Llama3 -from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer -from llama_stack.models.llama.llama4.generation import Llama4 -from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer -from llama_stack.models.llama.sku_types import Model, ModelFamily -from llama_stack.providers.utils.inference.prompt_adapter import ( - ChatCompletionRequestWithRawContent, - CompletionRequestWithRawContent, - get_default_tool_prompt_format, -) - -from .common import model_checkpoint_dir -from .config import MetaReferenceInferenceConfig -from .inference import resolve_model - -Tokenizer = Llama4Tokenizer | Llama3Tokenizer - - -class LogitsProcessor: - def __init__(self, token_enforcer: TokenEnforcer): - self.token_enforcer = token_enforcer - self.mask: torch.Tensor | None = None - - def __call__(self, tokens: torch.Tensor, scores: torch.Tensor) -> torch.Tensor: - token_sequence = tokens[0, :].tolist() - allowed_tokens = self.token_enforcer.get_allowed_tokens(token_sequence) - - if self.mask is not None: - self.mask.fill_(-math.inf) - else: - self.mask = torch.full_like(scores, -math.inf) - - self.mask[:, :, allowed_tokens] = 0 - scores = scores + self.mask - return scores - - -def get_logits_processor( - tokenizer: Tokenizer, - vocab_size: int, - response_format: ResponseFormat | None, -) -> Optional["LogitsProcessor"]: - if response_format is None: - return None - - if not isinstance(response_format, JsonSchemaResponseFormat): - raise ValueError(f"Unsupported response format type {response_format.type}") - - parser = JsonSchemaParser(response_format.json_schema) - data = TokenEnforcerTokenizerData( - _build_regular_tokens_list(tokenizer, vocab_size), - tokenizer.decode, - tokenizer.stop_tokens, - ) - token_enforcer = TokenEnforcer(data, parser) - return LogitsProcessor(token_enforcer) - - -def _build_regular_tokens_list(tokenizer: Tokenizer, vocab_size: int) -> list[tuple[int, str, bool]]: - token_0 = tokenizer.encode("0", bos=False, eos=False)[-1] - regular_tokens = [] - - special_token_ids = set(tokenizer.special_tokens.values()) - for token_idx in range(vocab_size): - if token_idx in special_token_ids: - continue - - # We prepend token 0 and skip the first letter of the result to get a space if the token is a start word. - decoded_after_0 = tokenizer.decode([token_0, token_idx])[1:] - decoded_regular = tokenizer.decode([token_idx]) - is_word_start_token = len(decoded_after_0) > len(decoded_regular) - regular_tokens.append((token_idx, decoded_after_0, is_word_start_token)) - return regular_tokens - - -def _infer_sampling_params(sampling_params: SamplingParams): - if isinstance(sampling_params.strategy, GreedySamplingStrategy): - temperature = 0.0 - top_p = 1.0 - elif isinstance(sampling_params.strategy, TopPSamplingStrategy): - temperature = sampling_params.strategy.temperature or 1.0 - top_p = sampling_params.strategy.top_p or 1.0 - else: - raise ValueError(f"Unsupported sampling strategy {sampling_params.strategy}") - return temperature, top_p - - -def _infer_tool_prompt_format(request: ChatCompletionRequestWithRawContent): - tool_config = request.tool_config - if tool_config is not None and tool_config.tool_prompt_format is not None: - return tool_config.tool_prompt_format - else: - return get_default_tool_prompt_format(request.model) - - -class LlamaGenerator: - def __init__( - self, - config: MetaReferenceInferenceConfig, - model_id: str, - llama_model: Model, - ): - if config.checkpoint_dir and config.checkpoint_dir != "null": - ckpt_dir = config.checkpoint_dir - else: - resolved_model = resolve_model(model_id) - if resolved_model is None: - # if the model is not a native llama model, get the default checkpoint_dir based on model id - ckpt_dir = model_checkpoint_dir(model_id) - else: - # if the model is a native llama model, get the default checkpoint_dir based on model core_model_id value - ckpt_dir = model_checkpoint_dir(resolved_model.descriptor()) - - if config.quantization: - if config.quantization.type == "fp8_mixed": - quantization_mode = QuantizationMode.fp8_mixed - elif config.quantization.type == "int4_mixed": - quantization_mode = QuantizationMode.int4_mixed - elif config.quantization.type == "bf16": - quantization_mode = None - else: - raise ValueError(f"Unsupported quantization mode {config.quantization}") - else: - quantization_mode = None - - cls = Llama4 if llama_model.model_family == ModelFamily.llama4 else Llama3 - self.inner_generator = cls.build( - ckpt_dir=ckpt_dir, - max_seq_len=config.max_seq_len, - max_batch_size=config.max_batch_size, - world_size=config.model_parallel_size or llama_model.pth_file_count, - quantization_mode=quantization_mode, - ) - - self.tokenizer = self.inner_generator.tokenizer - self.args = self.inner_generator.args - self.formatter = self.inner_generator.formatter - - def completion( - self, - request_batch: list[CompletionRequestWithRawContent], - ) -> Generator: - first_request = request_batch[0] - sampling_params = first_request.sampling_params or SamplingParams() - max_gen_len = sampling_params.max_tokens - if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len: - max_gen_len = self.args.max_seq_len - 1 - - temperature, top_p = _infer_sampling_params(sampling_params) - yield from self.inner_generator.generate( - llm_inputs=[self.formatter.encode_content(request.content) for request in request_batch], - max_gen_len=max_gen_len, - temperature=temperature, - top_p=top_p, - logprobs=bool(first_request.logprobs), - echo=False, - logits_processor=get_logits_processor( - self.tokenizer, - self.args.vocab_size, - first_request.response_format, - ), - ) - - def chat_completion( - self, - request_batch: list[ChatCompletionRequestWithRawContent], - ) -> Generator: - first_request = request_batch[0] - sampling_params = first_request.sampling_params or SamplingParams() - max_gen_len = sampling_params.max_tokens - if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len: - max_gen_len = self.args.max_seq_len - 1 - - temperature, top_p = _infer_sampling_params(sampling_params) - yield from self.inner_generator.generate( - llm_inputs=[ - self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request)) - for request in request_batch - ], - max_gen_len=max_gen_len, - temperature=temperature, - top_p=top_p, - logprobs=bool(first_request.logprobs), - echo=False, - logits_processor=get_logits_processor( - self.tokenizer, - self.args.vocab_size, - first_request.response_format, - ), - ) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py deleted file mode 100644 index 286335a7d..000000000 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -from collections.abc import AsyncIterator - -from llama_stack.apis.inference import ( - InferenceProvider, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, -) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, -) -from llama_stack.apis.models import Model, ModelType -from llama_stack.log import get_logger -from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat -from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer -from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat -from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer -from llama_stack.models.llama.sku_list import resolve_model -from llama_stack.models.llama.sku_types import ModelFamily -from llama_stack.providers.datatypes import ModelsProtocolPrivate -from llama_stack.providers.utils.inference.embedding_mixin import ( - SentenceTransformerEmbeddingMixin, -) -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, - build_hf_repo_model_entry, -) - -from .config import MetaReferenceInferenceConfig -from .generators import LlamaGenerator -from .model_parallel import LlamaModelParallelGenerator - -log = get_logger(__name__, category="inference") -# there's a single model parallel process running serving the model. for now, -# we don't support multiple concurrent requests to this process. -SEMAPHORE = asyncio.Semaphore(1) - - -def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_model: Model) -> LlamaGenerator: - return LlamaGenerator(config, model_id, llama_model) - - -class MetaReferenceInferenceImpl( - SentenceTransformerEmbeddingMixin, - InferenceProvider, - ModelsProtocolPrivate, -): - def __init__(self, config: MetaReferenceInferenceConfig) -> None: - self.config = config - self.model_id = None - self.llama_model = None - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - if self.config.create_distributed_process_group: - self.generator.stop() - - async def openai_completion( - self, - params: OpenAICompletionRequestWithExtraBody, - ) -> OpenAICompletion: - raise NotImplementedError("OpenAI completion not supported by meta reference provider") - - async def should_refresh_models(self) -> bool: - return False - - async def list_models(self) -> list[Model] | None: - return None - - async def unregister_model(self, model_id: str) -> None: - pass - - async def register_model(self, model: Model) -> Model: - llama_model = ( - resolve_model(model.metadata["llama_model"]) - if "llama_model" in model.metadata - else resolve_model(model.identifier) - ) - if llama_model is None: - raise ValueError( - "Please make sure your llama_model in model metadata or model identifier is in Llama SKU list" - ) - - self.model_registry_helper = ModelRegistryHelper( - [ - build_hf_repo_model_entry( - llama_model.descriptor(), - llama_model.core_model_id.value, - ) - ], - ) - model = await self.model_registry_helper.register_model(model) - - if model.model_type == ModelType.embedding: - self._load_sentence_transformer_model(model.provider_resource_id) - - # TODO: what is this?! you can't really specify skipping via model metadata - # kill this madness - if "skip_load" in model.metadata and model.metadata["skip_load"]: - return model - - await self.load_model(model.identifier, llama_model) - return model - - async def load_model(self, model_id, llama_model) -> None: - log.info(f"Loading model `{model_id}`") - - builder_params = [self.config, model_id, llama_model] - - if self.config.create_distributed_process_group: - self.generator = LlamaModelParallelGenerator( - model_parallel_size=self.config.model_parallel_size or llama_model.pth_file_count, - builder_fn=llama_builder_fn, - builder_params=builder_params, - formatter=( - Llama4ChatFormat(Llama4Tokenizer.get_instance()) - if llama_model.model_family == ModelFamily.llama4 - else Llama3ChatFormat(Llama3Tokenizer.get_instance()) - ), - ) - self.generator.start() - else: - self.generator = llama_builder_fn(*builder_params) - - self.model_id = model_id - self.llama_model = llama_model - - log.info("Warming up...") - await self.openai_chat_completion( - model=model_id, - messages=[{"role": "user", "content": "Hi how are you?"}], - max_tokens=20, - ) - log.info("Warmed up!") - - def check_model(self, request) -> None: - if self.model_id is None or self.llama_model is None: - raise RuntimeError( - "No avaible model yet, please register your requested model or add your model in the resouces first" - ) - elif request.model != self.model_id: - raise RuntimeError(f"Model mismatch: request model: {request.model} != loaded model: {self.model_id}") - - async def openai_chat_completion( - self, - params: OpenAIChatCompletionRequestWithExtraBody, - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider") diff --git a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/llama_stack/providers/inline/inference/meta_reference/model_parallel.py deleted file mode 100644 index 9d0295d65..000000000 --- a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import Callable, Generator -from copy import deepcopy -from functools import partial -from typing import Any - -from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat -from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat -from llama_stack.providers.utils.inference.prompt_adapter import ( - ChatCompletionRequestWithRawContent, - CompletionRequestWithRawContent, -) - -from .parallel_utils import ModelParallelProcessGroup - - -class ModelRunner: - def __init__(self, llama): - self.llama = llama - - # the `task` object is the same that is sent to `ModelParallelProcessGroup.run_inference()` - def __call__(self, task: Any): - if task[0] == "chat_completion": - return self.llama.chat_completion(task[1]) - else: - raise ValueError(f"Unexpected task type {task[0]}") - - -def init_model_cb( - builder_fn: Callable, - params: list[Any], -): - llama = builder_fn(*params) - return ModelRunner(llama) - - -class LlamaModelParallelGenerator: - """ - This abstraction exists so - - we can run model parallel code without needing to run the CLIs via torchrun - - this also enables use model parallel code within a notebook context. - - A Context Manager is used to ensure that the model parallel process is started and stopped - correctly. This does make the ergonomics a little awkward, because it isn't immediately - clear at the callsite why we need to use a context manager. - """ - - def __init__( - self, - model_parallel_size: int, - builder_fn: Callable, - builder_params: list[Any], - formatter: Llama3ChatFormat | Llama4ChatFormat, - ): - self.model_parallel_size = model_parallel_size - self.builder_fn = builder_fn - self.builder_params = builder_params - self.formatter = formatter - - def start(self): - self.__enter__() - - def stop(self): - self.__exit__(None, None, None) - - def __enter__(self): - self.group = ModelParallelProcessGroup( - self.model_parallel_size, - init_model_cb=partial(init_model_cb, self.builder_fn, self.builder_params), - ) - self.group.start() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.group.stop() - - def completion( - self, - request_batch: list[CompletionRequestWithRawContent], - ) -> Generator: - req_obj = deepcopy(request_batch) - gen = self.group.run_inference(("completion", req_obj)) - yield from gen - - def chat_completion( - self, - request_batch: list[ChatCompletionRequestWithRawContent], - ) -> Generator: - req_obj = deepcopy(request_batch) - gen = self.group.run_inference(("chat_completion", req_obj)) - yield from gen diff --git a/llama_stack/providers/inline/post_training/huggingface/post_training.py b/llama_stack/providers/inline/post_training/huggingface/post_training.py deleted file mode 100644 index 22ace1ae0..000000000 --- a/llama_stack/providers/inline/post_training/huggingface/post_training.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from enum import Enum -from typing import Any - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - AlgorithmConfig, - Checkpoint, - DPOAlignmentConfig, - JobStatus, - ListPostTrainingJobsResponse, - PostTrainingJob, - PostTrainingJobArtifactsResponse, - PostTrainingJobStatusResponse, - TrainingConfig, -) -from llama_stack.providers.inline.post_training.huggingface.config import ( - HuggingFacePostTrainingConfig, -) -from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler -from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus - - -class TrainingArtifactType(Enum): - CHECKPOINT = "checkpoint" - RESOURCES_STATS = "resources_stats" - - -_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune" -_JOB_TYPE_DPO_TRAINING = "dpo-training" - - -class HuggingFacePostTrainingImpl: - def __init__( - self, - config: HuggingFacePostTrainingConfig, - datasetio_api: DatasetIO, - datasets: Datasets, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets - self._scheduler = Scheduler() - - async def shutdown(self) -> None: - await self._scheduler.shutdown() - - @staticmethod - def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact: - return JobArtifact( - type=TrainingArtifactType.CHECKPOINT.value, - name=checkpoint.identifier, - uri=checkpoint.path, - metadata=dict(checkpoint), - ) - - @staticmethod - def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact: - return JobArtifact( - type=TrainingArtifactType.RESOURCES_STATS.value, - name=TrainingArtifactType.RESOURCES_STATS.value, - metadata=resources_stats, - ) - - async def supervised_fine_tune( - self, - job_uuid: str, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - model: str, - checkpoint_dir: str | None = None, - algorithm_config: AlgorithmConfig | None = None, - ) -> PostTrainingJob: - async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): - from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device import ( - HFFinetuningSingleDevice, - ) - - on_log_message_cb("Starting HF finetuning") - - recipe = HFFinetuningSingleDevice( - job_uuid=job_uuid, - datasetio_api=self.datasetio_api, - datasets_api=self.datasets_api, - ) - - resources_allocated, checkpoints = await recipe.train( - model=model, - output_dir=checkpoint_dir, - job_uuid=job_uuid, - lora_config=algorithm_config, - config=training_config, - provider_config=self.config, - ) - - on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) - if checkpoints: - for checkpoint in checkpoints: - artifact = self._checkpoint_to_artifact(checkpoint) - on_artifact_collected_cb(artifact) - - on_status_change_cb(SchedulerJobStatus.completed) - on_log_message_cb("HF finetuning completed") - - job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler) - return PostTrainingJob(job_uuid=job_uuid) - - async def preference_optimize( - self, - job_uuid: str, - finetuned_model: str, - algorithm_config: DPOAlignmentConfig, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - ) -> PostTrainingJob: - async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): - from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device_dpo import ( - HFDPOAlignmentSingleDevice, - ) - - on_log_message_cb("Starting HF DPO alignment") - - recipe = HFDPOAlignmentSingleDevice( - job_uuid=job_uuid, - datasetio_api=self.datasetio_api, - datasets_api=self.datasets_api, - ) - - resources_allocated, checkpoints = await recipe.train( - model=finetuned_model, - output_dir=f"{self.config.dpo_output_dir}/{job_uuid}", - job_uuid=job_uuid, - dpo_config=algorithm_config, - config=training_config, - provider_config=self.config, - ) - - on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) - if checkpoints: - for checkpoint in checkpoints: - artifact = self._checkpoint_to_artifact(checkpoint) - on_artifact_collected_cb(artifact) - else: - on_log_message_cb("Warning: No checkpoints were saved during DPO training") - - on_status_change_cb(SchedulerJobStatus.completed) - on_log_message_cb("HF DPO alignment completed") - - job_uuid = self._scheduler.schedule(_JOB_TYPE_DPO_TRAINING, job_uuid, handler) - return PostTrainingJob(job_uuid=job_uuid) - - @staticmethod - def _get_artifacts_metadata_by_type(job, artifact_type): - return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type] - - @classmethod - def _get_checkpoints(cls, job): - return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value) - - @classmethod - def _get_resources_allocated(cls, job): - data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value) - return data[0] if data else None - - async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None: - job = self._scheduler.get_job(job_uuid) - - match job.status: - # TODO: Add support for other statuses to API - case SchedulerJobStatus.new | SchedulerJobStatus.scheduled: - status = JobStatus.scheduled - case SchedulerJobStatus.running: - status = JobStatus.in_progress - case SchedulerJobStatus.completed: - status = JobStatus.completed - case SchedulerJobStatus.failed: - status = JobStatus.failed - case _: - raise NotImplementedError() - - return PostTrainingJobStatusResponse( - job_uuid=job_uuid, - status=status, - scheduled_at=job.scheduled_at, - started_at=job.started_at, - completed_at=job.completed_at, - checkpoints=self._get_checkpoints(job), - resources_allocated=self._get_resources_allocated(job), - ) - - async def cancel_training_job(self, job_uuid: str) -> None: - self._scheduler.cancel(job_uuid) - - async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None: - job = self._scheduler.get_job(job_uuid) - return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job)) - - async def get_training_jobs(self) -> ListPostTrainingJobsResponse: - return ListPostTrainingJobsResponse( - data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] - ) diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/llama_stack/providers/inline/post_training/huggingface/utils.py deleted file mode 100644 index f229c87dd..000000000 --- a/llama_stack/providers/inline/post_training/huggingface/utils.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -import signal -import sys -from datetime import UTC, datetime -from pathlib import Path -from typing import Any - -import psutil -import torch -from datasets import Dataset -from transformers import AutoConfig, AutoModelForCausalLM - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.post_training import Checkpoint, TrainingConfig -from llama_stack.log import get_logger - -from .config import HuggingFacePostTrainingConfig - -logger = get_logger(name=__name__, category="post_training") - - -def setup_environment(): - """Setup common environment variables for training.""" - os.environ["TOKENIZERS_PARALLELISM"] = "false" - os.environ["MKL_THREADING_LAYER"] = "GNU" - os.environ["MKL_SERVICE_FORCE_INTEL"] = "0" - os.environ["MKL_NUM_THREADS"] = "1" - - -def bytes_to_gb(to_convert: int) -> str: - """Converts memory stats to GB and formats to 2 decimal places. - Args: - to_convert: Memory value in bytes - Returns: - str: Memory value in GB formatted to 2 decimal places - """ - return f"{(to_convert / (1024**3)):.2f}" - - -def get_memory_stats(device: torch.device) -> dict[str, Any]: - """Get memory statistics for the given device.""" - stats = { - "system_memory": { - "total": bytes_to_gb(psutil.virtual_memory().total), - "available": bytes_to_gb(psutil.virtual_memory().available), - "used": bytes_to_gb(psutil.virtual_memory().used), - "percent": psutil.virtual_memory().percent, - } - } - - if device.type == "cuda": - stats["device_memory"] = { - "allocated": bytes_to_gb(torch.cuda.memory_allocated(device)), - "reserved": bytes_to_gb(torch.cuda.memory_reserved(device)), - "max_allocated": bytes_to_gb(torch.cuda.max_memory_allocated(device)), - } - elif device.type == "mps": - # MPS doesn't provide direct memory stats, but we can track system memory - stats["device_memory"] = { - "note": "MPS memory stats not directly available", - "system_memory_used": bytes_to_gb(psutil.virtual_memory().used), - } - elif device.type == "cpu": - # For CPU, we track process memory usage - process = psutil.Process() - stats["device_memory"] = { - "process_rss": bytes_to_gb(process.memory_info().rss), - "process_vms": bytes_to_gb(process.memory_info().vms), - "process_percent": process.memory_percent(), - } - - return stats - - -def setup_torch_device(device_str: str) -> torch.device: - """Initialize and validate a PyTorch device. - This function handles device initialization and validation for different device types: - - CUDA: Validates CUDA availability and handles device selection - - MPS: Validates MPS availability for Apple Silicon - - CPU: Basic validation - - HPU: Raises error as it's not supported - Args: - device_str: String specifying the device ('cuda', 'cpu', 'mps') - Returns: - torch.device: The initialized and validated device - Raises: - RuntimeError: If device initialization fails or device is not supported - """ - try: - device = torch.device(device_str) - except RuntimeError as e: - raise RuntimeError(f"Error getting Torch Device {str(e)}") from e - - # Validate device capabilities - if device.type == "cuda": - if not torch.cuda.is_available(): - raise RuntimeError( - f"{device.type}: Torch has no CUDA/ROCm support or could not detect a compatible device." - ) - if device.index is None: - device = torch.device(device.type, torch.cuda.current_device()) - elif device.type == "mps": - if not torch.backends.mps.is_available(): - raise RuntimeError(f"{device.type}: Torch has no MPS support or could not detect a compatible device.") - elif device.type == "hpu": - raise RuntimeError(f"{device.type}: training does not support Intel Gaudi.") - - return device - - -async def load_rows_from_dataset(datasetio_api: DatasetIO, dataset_id: str) -> list[dict[str, Any]]: - """Load dataset from llama stack dataset provider""" - try: - all_rows = await datasetio_api.iterrows( - dataset_id=dataset_id, - limit=-1, - ) - if not isinstance(all_rows.data, list): - raise RuntimeError("Expected dataset data to be a list") - return all_rows.data - except Exception as e: - raise RuntimeError(f"Failed to load dataset: {str(e)}") from e - - -def load_model( - model: str, - device: torch.device, - provider_config: HuggingFacePostTrainingConfig, -) -> AutoModelForCausalLM: - """Load and initialize the model for training. - Args: - model: The model identifier to load - device: The device to load the model onto - provider_config: Provider-specific configuration - Returns: - The loaded and initialized model - Raises: - RuntimeError: If model loading fails - """ - logger.info("Loading the base model") - try: - model_config = AutoConfig.from_pretrained(model, **provider_config.model_specific_config) - model_obj = AutoModelForCausalLM.from_pretrained( - model, - torch_dtype="auto" if device.type != "cpu" else "float32", - quantization_config=None, - config=model_config, - **provider_config.model_specific_config, - ) - # Always move model to specified device - model_obj = model_obj.to(device) - logger.info(f"Model loaded and moved to device: {model_obj.device}") - return model_obj - except Exception as e: - raise RuntimeError(f"Failed to load model: {str(e)}") from e - - -def split_dataset(ds: Dataset) -> tuple[Dataset, Dataset]: - """Split dataset into train and validation sets. - Args: - ds: Dataset to split - Returns: - tuple: (train_dataset, eval_dataset) - """ - logger.info("Splitting dataset into train and validation sets") - train_val_split = ds.train_test_split(test_size=0.1, seed=42) - train_dataset = train_val_split["train"] - eval_dataset = train_val_split["test"] - logger.info(f"Split dataset into {len(train_dataset)} training and {len(eval_dataset)} validation examples") - return train_dataset, eval_dataset - - -def setup_signal_handlers(): - """Setup signal handlers for graceful shutdown.""" - - def signal_handler(signum, frame): - logger.info(f"Received signal {signum}, initiating graceful shutdown") - sys.exit(0) - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - -def calculate_training_steps(steps_per_epoch: int, config: TrainingConfig) -> dict[str, int]: - """Calculate training steps and logging configuration. - Args: - steps_per_epoch: Number of training steps per epoch - config: Training configuration - Returns: - dict: Dictionary with calculated step values - """ - total_steps = steps_per_epoch * config.n_epochs - max_steps = min(config.max_steps_per_epoch, total_steps) - logging_steps = max(1, steps_per_epoch // 50) # Log 50 times per epoch - - logger.info("Training configuration:") - logger.info(f"- Steps per epoch: {steps_per_epoch}") - logger.info(f"- Total steps: {total_steps}") - logger.info(f"- Max steps: {max_steps}") - logger.info(f"- Logging steps: {logging_steps}") - - return {"total_steps": total_steps, "max_steps": max_steps, "logging_steps": logging_steps} - - -def get_save_strategy(output_dir_path: Path | None) -> tuple[str, str]: - """Get save and evaluation strategy based on output directory. - Args: - output_dir_path: Optional path to save the model - Returns: - tuple: (save_strategy, eval_strategy) - """ - if output_dir_path: - logger.info(f"Will save checkpoints to {output_dir_path}") - return "epoch", "epoch" - return "no", "no" - - -def create_checkpoints( - output_dir_path: Path, job_uuid: str, model: str, config: TrainingConfig, final_model_name: str -) -> list[Checkpoint]: - """Create checkpoint objects from training output. - Args: - output_dir_path: Path to the training output directory - job_uuid: Unique identifier for the training job - model: Model identifier - config: Training configuration - final_model_name: Name of the final model directory ("merged_model" for SFT, "dpo_model" for DPO) - Returns: - List of Checkpoint objects - """ - checkpoints = [] - - # Add checkpoint directories - checkpoint_dirs = sorted( - [d for d in output_dir_path.glob("checkpoint-*") if d.is_dir()], - key=lambda x: int(x.name.split("-")[1]), - ) - - for epoch_number, checkpoint_dir in enumerate(checkpoint_dirs, start=1): - created_time = datetime.fromtimestamp(os.path.getctime(checkpoint_dir), tz=UTC) - checkpoint = Checkpoint( - identifier=checkpoint_dir.name, - created_at=created_time, - epoch=epoch_number, - post_training_job_id=job_uuid, - path=str(checkpoint_dir), - ) - checkpoints.append(checkpoint) - - # Add final model - final_model_path = output_dir_path / final_model_name - if final_model_path.exists(): - training_type = "sft" if final_model_name == "merged_model" else "dpo" - checkpoint = Checkpoint( - identifier=f"{model}-{training_type}-{config.n_epochs}", - created_at=datetime.now(UTC), - epoch=config.n_epochs, - post_training_job_id=job_uuid, - path=str(final_model_path), - ) - checkpoints.append(checkpoint) - - return checkpoints diff --git a/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/llama_stack/providers/inline/post_training/torchtune/common/utils.py deleted file mode 100644 index f0fa052a2..000000000 --- a/llama_stack/providers/inline/post_training/torchtune/common/utils.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, IAny, nc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import Callable - -import torch -from pydantic import BaseModel -from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages -from torchtune.models.llama3 import llama3_tokenizer -from torchtune.models.llama3._tokenizer import Llama3Tokenizer -from torchtune.models.llama3_1 import lora_llama3_1_8b -from torchtune.models.llama3_2 import lora_llama3_2_3b -from torchtune.modules.transforms import Transform - -from llama_stack.apis.post_training import DatasetFormat -from llama_stack.models.llama.sku_list import resolve_model -from llama_stack.models.llama.sku_types import Model - -BuildLoraModelCallable = Callable[..., torch.nn.Module] -BuildTokenizerCallable = Callable[..., Llama3Tokenizer] - - -class ModelConfig(BaseModel): - model_definition: BuildLoraModelCallable - tokenizer_type: BuildTokenizerCallable - checkpoint_type: str - - -MODEL_CONFIGS: dict[str, ModelConfig] = { - "Llama3.2-3B-Instruct": ModelConfig( - model_definition=lora_llama3_2_3b, - tokenizer_type=llama3_tokenizer, - checkpoint_type="LLAMA3_2", - ), - "Llama3.1-8B-Instruct": ModelConfig( - model_definition=lora_llama3_1_8b, - tokenizer_type=llama3_tokenizer, - checkpoint_type="LLAMA3", - ), -} - -DATA_FORMATS: dict[str, Transform] = { - "instruct": InputOutputToMessages, - "dialog": ShareGPTToMessages, -} - - -def _validate_model_id(model_id: str) -> Model: - model = resolve_model(model_id) - if model is None or model.core_model_id.value not in MODEL_CONFIGS: - raise ValueError(f"Model {model_id} is not supported.") - return model - - -async def get_model_definition( - model_id: str, -) -> BuildLoraModelCallable: - model = _validate_model_id(model_id) - model_config = MODEL_CONFIGS[model.core_model_id.value] - if not hasattr(model_config, "model_definition"): - raise ValueError(f"Model {model_id} does not have model definition.") - return model_config.model_definition - - -async def get_tokenizer_type( - model_id: str, -) -> BuildTokenizerCallable: - model = _validate_model_id(model_id) - model_config = MODEL_CONFIGS[model.core_model_id.value] - if not hasattr(model_config, "tokenizer_type"): - raise ValueError(f"Model {model_id} does not have tokenizer_type.") - return model_config.tokenizer_type - - -async def get_checkpointer_model_type( - model_id: str, -) -> str: - """ - checkpointer model type is used in checkpointer for some special treatment on some specific model types - For example, llama3.2 model tied weights (https://github.com/pytorch/torchtune/blob/main/torchtune/training/checkpointing/_checkpointer.py#L1041) - """ - model = _validate_model_id(model_id) - model_config = MODEL_CONFIGS[model.core_model_id.value] - if not hasattr(model_config, "checkpoint_type"): - raise ValueError(f"Model {model_id} does not have checkpoint_type.") - return model_config.checkpoint_type - - -async def get_data_transform(data_format: DatasetFormat) -> Transform: - return DATA_FORMATS[data_format.value] diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py deleted file mode 100644 index 765f6789d..000000000 --- a/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from enum import Enum -from typing import Any - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - AlgorithmConfig, - Checkpoint, - DPOAlignmentConfig, - JobStatus, - ListPostTrainingJobsResponse, - LoraFinetuningConfig, - PostTrainingJob, - PostTrainingJobArtifactsResponse, - PostTrainingJobStatusResponse, - TrainingConfig, -) -from llama_stack.providers.inline.post_training.torchtune.config import ( - TorchtunePostTrainingConfig, -) -from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler -from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus - - -class TrainingArtifactType(Enum): - CHECKPOINT = "checkpoint" - RESOURCES_STATS = "resources_stats" - - -_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune" - - -class TorchtunePostTrainingImpl: - def __init__( - self, - config: TorchtunePostTrainingConfig, - datasetio_api: DatasetIO, - datasets: Datasets, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets - self._scheduler = Scheduler() - - async def shutdown(self) -> None: - await self._scheduler.shutdown() - - @staticmethod - def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact: - return JobArtifact( - type=TrainingArtifactType.CHECKPOINT.value, - name=checkpoint.identifier, - uri=checkpoint.path, - metadata=dict(checkpoint), - ) - - @staticmethod - def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact: - return JobArtifact( - type=TrainingArtifactType.RESOURCES_STATS.value, - name=TrainingArtifactType.RESOURCES_STATS.value, - metadata=resources_stats, - ) - - async def supervised_fine_tune( - self, - job_uuid: str, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - model: str, - checkpoint_dir: str | None, - algorithm_config: AlgorithmConfig | None, - ) -> PostTrainingJob: - if isinstance(algorithm_config, LoraFinetuningConfig): - - async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): - from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import ( - LoraFinetuningSingleDevice, - ) - - on_log_message_cb("Starting Lora finetuning") - - recipe = LoraFinetuningSingleDevice( - self.config, - job_uuid, - training_config, - hyperparam_search_config, - logger_config, - model, - checkpoint_dir, - algorithm_config, - self.datasetio_api, - self.datasets_api, - ) - await recipe.setup() - - resources_allocated, checkpoints = await recipe.train() - - on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) - for checkpoint in checkpoints: - artifact = self._checkpoint_to_artifact(checkpoint) - on_artifact_collected_cb(artifact) - - on_status_change_cb(SchedulerJobStatus.completed) - on_log_message_cb("Lora finetuning completed") - else: - raise NotImplementedError() - - job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler) - return PostTrainingJob(job_uuid=job_uuid) - - async def preference_optimize( - self, - job_uuid: str, - finetuned_model: str, - algorithm_config: DPOAlignmentConfig, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - ) -> PostTrainingJob: - raise NotImplementedError() - - async def get_training_jobs(self) -> ListPostTrainingJobsResponse: - return ListPostTrainingJobsResponse( - data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] - ) - - @staticmethod - def _get_artifacts_metadata_by_type(job, artifact_type): - return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type] - - @classmethod - def _get_checkpoints(cls, job): - return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value) - - @classmethod - def _get_resources_allocated(cls, job): - data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value) - return data[0] if data else None - - async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None: - job = self._scheduler.get_job(job_uuid) - - match job.status: - # TODO: Add support for other statuses to API - case SchedulerJobStatus.new | SchedulerJobStatus.scheduled: - status = JobStatus.scheduled - case SchedulerJobStatus.running: - status = JobStatus.in_progress - case SchedulerJobStatus.completed: - status = JobStatus.completed - case SchedulerJobStatus.failed: - status = JobStatus.failed - case _: - raise NotImplementedError() - - return PostTrainingJobStatusResponse( - job_uuid=job_uuid, - status=status, - scheduled_at=job.scheduled_at, - started_at=job.started_at, - completed_at=job.completed_at, - checkpoints=self._get_checkpoints(job), - resources_allocated=self._get_resources_allocated(job), - ) - - async def cancel_training_job(self, job_uuid: str) -> None: - self._scheduler.cancel(job_uuid) - - async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None: - job = self._scheduler.get_job(job_uuid) - return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job)) diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/llama_stack/providers/inline/scoring/basic/scoring.py deleted file mode 100644 index b19b68039..000000000 --- a/llama_stack/providers/inline/scoring/basic/scoring.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.scoring import ( - ScoreBatchResponse, - ScoreResponse, - Scoring, - ScoringResult, -) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.core.datatypes import Api -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate -from llama_stack.providers.utils.common.data_schema_validator import ( - get_valid_schemas, - validate_dataset_schema, -) - -from .config import BasicScoringConfig -from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn -from .scoring_fn.equality_scoring_fn import EqualityScoringFn -from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn -from .scoring_fn.regex_parser_math_response_scoring_fn import ( - RegexParserMathResponseScoringFn, -) -from .scoring_fn.regex_parser_scoring_fn import RegexParserScoringFn -from .scoring_fn.subset_of_scoring_fn import SubsetOfScoringFn - -FIXED_FNS = [ - EqualityScoringFn, - SubsetOfScoringFn, - RegexParserScoringFn, - RegexParserMathResponseScoringFn, - IfEvalScoringFn, - DocVQAScoringFn, -] - - -class BasicScoringImpl( - Scoring, - ScoringFunctionsProtocolPrivate, -): - def __init__( - self, - config: BasicScoringConfig, - datasetio_api: DatasetIO, - datasets_api: Datasets, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets_api - self.scoring_fn_id_impls = {} - - async def initialize(self) -> None: - for fn in FIXED_FNS: - impl = fn() - for fn_defs in impl.get_supported_scoring_fn_defs(): - self.scoring_fn_id_impls[fn_defs.identifier] = impl - - async def shutdown(self) -> None: ... - - async def list_scoring_functions(self) -> list[ScoringFn]: - scoring_fn_defs_list = [ - fn_def for impl in self.scoring_fn_id_impls.values() for fn_def in impl.get_supported_scoring_fn_defs() - ] - - for f in scoring_fn_defs_list: - assert f.identifier.startswith("basic"), "All basic scoring fn must have identifier prefixed with 'basic'! " - - return scoring_fn_defs_list - - async def register_scoring_function(self, function_def: ScoringFn) -> None: - raise NotImplementedError("Register scoring function not implemented yet") - - async def score_batch( - self, - dataset_id: str, - scoring_functions: dict[str, ScoringFnParams | None] = None, - save_results_dataset: bool = False, - ) -> ScoreBatchResponse: - dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) - validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value)) - - all_rows = await self.datasetio_api.iterrows( - dataset_id=dataset_id, - limit=-1, - ) - res = await self.score( - input_rows=all_rows.data, - scoring_functions=scoring_functions, - ) - if save_results_dataset: - # TODO: persist and register dataset on to server for reading - # self.datasets_api.register_dataset() - raise NotImplementedError("Save results dataset not implemented yet") - - return ScoreBatchResponse( - results=res.results, - ) - - async def score( - self, - input_rows: list[dict[str, Any]], - scoring_functions: dict[str, ScoringFnParams | None] = None, - ) -> ScoreResponse: - res = {} - for scoring_fn_id in scoring_functions.keys(): - if scoring_fn_id not in self.scoring_fn_id_impls: - raise ValueError(f"Scoring function {scoring_fn_id} is not supported.") - scoring_fn = self.scoring_fn_id_impls[scoring_fn_id] - scoring_fn_params = scoring_functions.get(scoring_fn_id, None) - score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params) - agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params) - res[scoring_fn_id] = ScoringResult( - score_rows=score_results, - aggregated_results=agg_results, - ) - - return ScoreResponse( - results=res, - ) diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py deleted file mode 100644 index 9b7628524..000000000 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import Inference -from llama_stack.apis.scoring import ( - ScoreBatchResponse, - ScoreResponse, - Scoring, - ScoringResult, -) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.core.datatypes import Api -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate -from llama_stack.providers.utils.common.data_schema_validator import ( - get_valid_schemas, - validate_dataset_schema, -) - -from .config import LlmAsJudgeScoringConfig -from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn - -LLM_JUDGE_FN = LlmAsJudgeScoringFn - - -class LlmAsJudgeScoringImpl( - Scoring, - ScoringFunctionsProtocolPrivate, -): - def __init__( - self, - config: LlmAsJudgeScoringConfig, - datasetio_api: DatasetIO, - datasets_api: Datasets, - inference_api: Inference, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets_api - self.inference_api = inference_api - - async def initialize(self) -> None: - impl = LLM_JUDGE_FN(inference_api=self.inference_api) - self.llm_as_judge_fn = impl - - async def shutdown(self) -> None: ... - - async def list_scoring_functions(self) -> list[ScoringFn]: - scoring_fn_defs_list = self.llm_as_judge_fn.get_supported_scoring_fn_defs() - - for f in self.llm_as_judge_fn.get_supported_scoring_fn_defs(): - assert f.identifier.startswith("llm-as-judge"), ( - "All llm-as-judge scoring fn must have identifier prefixed with 'llm-as-judge'! " - ) - - return scoring_fn_defs_list - - async def register_scoring_function(self, function_def: ScoringFn) -> None: - self.llm_as_judge_fn.register_scoring_fn_def(function_def) - - async def unregister_scoring_function(self, scoring_fn_id: str) -> None: - self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id) - - async def score_batch( - self, - dataset_id: str, - scoring_functions: dict[str, ScoringFnParams | None] = None, - save_results_dataset: bool = False, - ) -> ScoreBatchResponse: - dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) - validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value)) - - all_rows = await self.datasetio_api.iterrows( - dataset_id=dataset_id, - limit=-1, - ) - res = await self.score( - input_rows=all_rows.data, - scoring_functions=scoring_functions, - ) - if save_results_dataset: - # TODO: persist and register dataset on to server for reading - # self.datasets_api.register_dataset() - raise NotImplementedError("Save results dataset not implemented yet") - - return ScoreBatchResponse( - results=res.results, - ) - - async def score( - self, - input_rows: list[dict[str, Any]], - scoring_functions: dict[str, ScoringFnParams | None] = None, - ) -> ScoreResponse: - res = {} - for scoring_fn_id in scoring_functions.keys(): - scoring_fn = self.llm_as_judge_fn - scoring_fn_params = scoring_functions.get(scoring_fn_id, None) - score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params) - agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params) - res[scoring_fn_id] = ScoringResult( - score_rows=score_results, - aggregated_results=agg_results, - ) - - return ScoreResponse( - results=res, - ) diff --git a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py b/llama_stack/providers/inline/telemetry/meta_reference/__init__.py deleted file mode 100644 index 21743b653..000000000 --- a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.core.datatypes import Api - -from .config import TelemetryConfig, TelemetrySink - -__all__ = ["TelemetryConfig", "TelemetrySink"] - - -async def get_provider_impl(config: TelemetryConfig, deps: dict[Api, Any]): - from .telemetry import TelemetryAdapter - - impl = TelemetryAdapter(config, deps) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py deleted file mode 100644 index 088dd8439..000000000 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import StrEnum -from typing import Any - -from pydantic import BaseModel, Field, field_validator - - -class TelemetrySink(StrEnum): - OTEL_TRACE = "otel_trace" - OTEL_METRIC = "otel_metric" - CONSOLE = "console" - - -class TelemetryConfig(BaseModel): - otel_exporter_otlp_endpoint: str | None = Field( - default=None, - description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.", - ) - service_name: str = Field( - # service name is always the same, use zero-width space to avoid clutter - default="\u200b", - description="The service name to use for telemetry", - ) - sinks: list[TelemetrySink] = Field( - default_factory=list, - description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, console)", - ) - - @field_validator("sinks", mode="before") - @classmethod - def validate_sinks(cls, v): - if isinstance(v, str): - return [TelemetrySink(sink.strip()) for sink in v.split(",")] - return v or [] - - @classmethod - def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: - return { - "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}", - "sinks": "${env.TELEMETRY_SINKS:=}", - "otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}", - } diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py deleted file mode 100644 index b15b1e490..000000000 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -import threading -from typing import Any - -from opentelemetry import metrics, trace -from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader -from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator - -from llama_stack.apis.telemetry import ( - Event, - MetricEvent, - SpanEndPayload, - SpanStartPayload, - SpanStatus, - StructuredLogEvent, - Telemetry, - UnstructuredLogEvent, -) -from llama_stack.core.datatypes import Api -from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS - -from .config import TelemetryConfig - -_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = { - "active_spans": {}, - "counters": {}, - "gauges": {}, - "up_down_counters": {}, -} -_global_lock = threading.Lock() -_TRACER_PROVIDER = None - -logger = get_logger(name=__name__, category="telemetry") - - -def is_tracing_enabled(tracer): - with tracer.start_as_current_span("check_tracing") as span: - return span.is_recording() - - -class TelemetryAdapter(Telemetry): - def __init__(self, _config: TelemetryConfig, deps: dict[Api, Any]) -> None: - self.datasetio_api = deps.get(Api.datasetio) - self.meter = None - - global _TRACER_PROVIDER - # Initialize the correct span processor based on the provider state. - # This is needed since once the span processor is set, it cannot be unset. - # Recreating the telemetry adapter multiple times will result in duplicate span processors. - # Since the library client can be recreated multiple times in a notebook, - # the kernel will hold on to the span processor and cause duplicate spans to be written. - if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"): - if _TRACER_PROVIDER is None: - provider = TracerProvider() - trace.set_tracer_provider(provider) - _TRACER_PROVIDER = provider - - # Use single OTLP endpoint for all telemetry signals - - # Let OpenTelemetry SDK handle endpoint construction automatically - # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs - # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter - span_exporter = OTLPSpanExporter() - span_processor = BatchSpanProcessor(span_exporter) - trace.get_tracer_provider().add_span_processor(span_processor) - - metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) - metric_provider = MeterProvider(metric_readers=[metric_reader]) - metrics.set_meter_provider(metric_provider) - self.is_otel_endpoint_set = True - else: - logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry") - self.is_otel_endpoint_set = False - - self.meter = metrics.get_meter(__name__) - self._lock = _global_lock - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - if self.is_otel_endpoint_set: - trace.get_tracer_provider().force_flush() - - async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: - if isinstance(event, UnstructuredLogEvent): - self._log_unstructured(event, ttl_seconds) - elif isinstance(event, MetricEvent): - self._log_metric(event) - elif isinstance(event, StructuredLogEvent): - self._log_structured(event, ttl_seconds) - else: - raise ValueError(f"Unknown event type: {event}") - - def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None: - with self._lock: - # Use global storage instead of instance storage - span_id = int(event.span_id, 16) - span = _GLOBAL_STORAGE["active_spans"].get(span_id) - - if span: - timestamp_ns = int(event.timestamp.timestamp() * 1e9) - span.add_event( - name=event.type.value, - attributes={ - "message": event.message, - "severity": event.severity.value, - "__ttl__": ttl_seconds, - **(event.attributes or {}), - }, - timestamp=timestamp_ns, - ) - else: - print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}") - - def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter: - assert self.meter is not None - if name not in _GLOBAL_STORAGE["counters"]: - _GLOBAL_STORAGE["counters"][name] = self.meter.create_counter( - name=name, - unit=unit, - description=f"Counter for {name}", - ) - return _GLOBAL_STORAGE["counters"][name] - - def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge: - assert self.meter is not None - if name not in _GLOBAL_STORAGE["gauges"]: - _GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge( - name=name, - unit=unit, - description=f"Gauge for {name}", - ) - return _GLOBAL_STORAGE["gauges"][name] - - def _log_metric(self, event: MetricEvent) -> None: - # Add metric as an event to the current span - try: - with self._lock: - # Only try to add to span if we have a valid span_id - if event.span_id: - try: - span_id = int(event.span_id, 16) - span = _GLOBAL_STORAGE["active_spans"].get(span_id) - - if span: - timestamp_ns = int(event.timestamp.timestamp() * 1e9) - span.add_event( - name=f"metric.{event.metric}", - attributes={ - "value": event.value, - "unit": event.unit, - **(event.attributes or {}), - }, - timestamp=timestamp_ns, - ) - except (ValueError, KeyError): - # Invalid span_id or span not found, but we already logged to console above - pass - except Exception: - # Lock acquisition failed - logger.debug("Failed to acquire lock to add metric to span") - - # Log to OpenTelemetry meter if available - if self.meter is None: - return - if isinstance(event.value, int): - counter = self._get_or_create_counter(event.metric, event.unit) - counter.add(event.value, attributes=event.attributes) - elif isinstance(event.value, float): - up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit) - up_down_counter.add(event.value, attributes=event.attributes) - - def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter: - assert self.meter is not None - if name not in _GLOBAL_STORAGE["up_down_counters"]: - _GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter( - name=name, - unit=unit, - description=f"UpDownCounter for {name}", - ) - return _GLOBAL_STORAGE["up_down_counters"][name] - - def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None: - with self._lock: - span_id = int(event.span_id, 16) - tracer = trace.get_tracer(__name__) - if event.attributes is None: - event.attributes = {} - event.attributes["__ttl__"] = ttl_seconds - - # Extract these W3C trace context attributes so they are not written to - # underlying storage, as we just need them to propagate the trace context. - traceparent = event.attributes.pop("traceparent", None) - tracestate = event.attributes.pop("tracestate", None) - if traceparent: - # If we have a traceparent header value, we're not the root span. - for root_attribute in ROOT_SPAN_MARKERS: - event.attributes.pop(root_attribute, None) - - if isinstance(event.payload, SpanStartPayload): - # Check if span already exists to prevent duplicates - if span_id in _GLOBAL_STORAGE["active_spans"]: - return - - context = None - if event.payload.parent_span_id: - parent_span_id = int(event.payload.parent_span_id, 16) - parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id) - context = trace.set_span_in_context(parent_span) - elif traceparent: - carrier = { - "traceparent": traceparent, - "tracestate": tracestate, - } - context = TraceContextTextMapPropagator().extract(carrier=carrier) - - span = tracer.start_span( - name=event.payload.name, - context=context, - attributes=event.attributes or {}, - ) - _GLOBAL_STORAGE["active_spans"][span_id] = span - - elif isinstance(event.payload, SpanEndPayload): - span = _GLOBAL_STORAGE["active_spans"].get(span_id) - if span: - if event.attributes: - span.set_attributes(event.attributes) - - status = ( - trace.Status(status_code=trace.StatusCode.OK) - if event.payload.status == SpanStatus.OK - else trace.Status(status_code=trace.StatusCode.ERROR) - ) - span.set_status(status) - span.end() - _GLOBAL_STORAGE["active_spans"].pop(span_id, None) - else: - raise ValueError(f"Unknown structured log event: {event}") diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py deleted file mode 100644 index f9a7e7b89..000000000 --- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import RagToolRuntimeConfig - - -async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]): - from .memory import MemoryToolRuntimeImpl - - impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files]) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py deleted file mode 100644 index 575e5ad88..000000000 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import ChromaVectorIOConfig - - -async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): - from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter - - impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py deleted file mode 100644 index 1798f10de..000000000 --- a/llama_stack/providers/inline/vector_io/chroma/config.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class ChromaVectorIOConfig(BaseModel): - db_path: str - persistence: KVStoreReference = Field(description="Config for KV store backend") - - @classmethod - def sample_run_config( - cls, __distro_dir__: str, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any - ) -> dict[str, Any]: - return { - "db_path": db_path, - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::chroma", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py deleted file mode 100644 index 24d1f292a..000000000 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import FaissVectorIOConfig - - -async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): - from .faiss import FaissVectorIOAdapter - - assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - - impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py deleted file mode 100644 index dd7a7aeca..000000000 --- a/llama_stack/providers/inline/vector_io/faiss/config.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class FaissVectorIOConfig(BaseModel): - persistence: KVStoreReference - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return { - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::faiss", - ).model_dump(exclude_none=True) - } diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py deleted file mode 100644 index 7dc9c6a33..000000000 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import MilvusVectorIOConfig - - -async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): - from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter - - impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py deleted file mode 100644 index b333b04ea..000000000 --- a/llama_stack/providers/inline/vector_io/milvus/config.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class MilvusVectorIOConfig(BaseModel): - db_path: str - persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") - consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return { - "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db", - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::milvus", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py deleted file mode 100644 index bef6d50e6..000000000 --- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import QdrantVectorIOConfig - - -async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): - from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter - - assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py deleted file mode 100644 index e7ecde7b7..000000000 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from typing import Any - -from pydantic import BaseModel - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class QdrantVectorIOConfig(BaseModel): - path: str - persistence: KVStoreReference - - @classmethod - def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: - return { - "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::qdrant", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py deleted file mode 100644 index df96e927c..000000000 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from llama_stack.providers.datatypes import Api - -from .config import SQLiteVectorIOConfig - - -async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): - from .sqlite_vec import SQLiteVecVectorIOAdapter - - assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py deleted file mode 100644 index 1845d6f46..000000000 --- a/llama_stack/providers/registry/agents.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, -) -from llama_stack.providers.utils.kvstore import kvstore_dependencies - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.agents, - provider_type="inline::meta-reference", - pip_packages=[ - "matplotlib", - "pillow", - "pandas", - "scikit-learn", - "mcp>=1.8.1", - ] - + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config - module="llama_stack.providers.inline.agents.meta_reference", - config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig", - api_dependencies=[ - Api.inference, - Api.safety, - Api.vector_io, - Api.tool_runtime, - Api.tool_groups, - Api.conversations, - ], - description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", - ), - ] diff --git a/llama_stack/providers/registry/batches.py b/llama_stack/providers/registry/batches.py deleted file mode 100644 index a07942486..000000000 --- a/llama_stack/providers/registry/batches.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.batches, - provider_type="inline::reference", - pip_packages=[], - module="llama_stack.providers.inline.batches.reference", - config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig", - api_dependencies=[ - Api.inference, - Api.files, - Api.models, - ], - description="Reference implementation of batches API with KVStore persistence.", - ), - ] diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py deleted file mode 100644 index a9feb0bac..000000000 --- a/llama_stack/providers/registry/datasetio.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, - RemoteProviderSpec, -) - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.datasetio, - provider_type="inline::localfs", - pip_packages=["pandas"], - module="llama_stack.providers.inline.datasetio.localfs", - config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig", - api_dependencies=[], - description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.", - ), - RemoteProviderSpec( - api=Api.datasetio, - adapter_type="huggingface", - provider_type="remote::huggingface", - pip_packages=[ - "datasets>=4.0.0", - ], - module="llama_stack.providers.remote.datasetio.huggingface", - config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig", - description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.", - ), - RemoteProviderSpec( - api=Api.datasetio, - adapter_type="nvidia", - provider_type="remote::nvidia", - module="llama_stack.providers.remote.datasetio.nvidia", - config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig", - pip_packages=[ - "datasets>=4.0.0", - ], - description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.", - ), - ] diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py deleted file mode 100644 index 4ef0bb41f..000000000 --- a/llama_stack/providers/registry/eval.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.eval, - provider_type="inline::meta-reference", - pip_packages=["tree_sitter", "pythainlp", "langdetect", "emoji", "nltk"], - module="llama_stack.providers.inline.eval.meta_reference", - config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - Api.scoring, - Api.inference, - Api.agents, - ], - description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.", - ), - RemoteProviderSpec( - api=Api.eval, - adapter_type="nvidia", - pip_packages=[ - "requests", - ], - provider_type="remote::nvidia", - module="llama_stack.providers.remote.eval.nvidia", - config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig", - description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.", - api_dependencies=[ - Api.datasetio, - Api.datasets, - Api.scoring, - Api.inference, - Api.agents, - ], - ), - ] diff --git a/llama_stack/providers/registry/files.py b/llama_stack/providers/registry/files.py deleted file mode 100644 index 9acabfacd..000000000 --- a/llama_stack/providers/registry/files.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec -from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.files, - provider_type="inline::localfs", - # TODO: make this dynamic according to the sql store type - pip_packages=sql_store_pip_packages, - module="llama_stack.providers.inline.files.localfs", - config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig", - description="Local filesystem-based file storage provider for managing files and documents locally.", - ), - RemoteProviderSpec( - api=Api.files, - provider_type="remote::s3", - adapter_type="s3", - pip_packages=["boto3"] + sql_store_pip_packages, - module="llama_stack.providers.remote.files.s3", - config_class="llama_stack.providers.remote.files.s3.config.S3FilesImplConfig", - description="AWS S3-based file storage provider for scalable cloud file management with metadata persistence.", - ), - ] diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py deleted file mode 100644 index 35afb296d..000000000 --- a/llama_stack/providers/registry/inference.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, - RemoteProviderSpec, -) - -META_REFERENCE_DEPS = [ - "accelerate", - "fairscale", - "torch", - "torchvision", - "transformers", - "zmq", - "lm-format-enforcer", - "sentence-transformers", - "torchao==0.8.0", - "fbgemm-gpu-genai==1.1.2", -] - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.inference, - provider_type="inline::meta-reference", - pip_packages=META_REFERENCE_DEPS, - module="llama_stack.providers.inline.inference.meta_reference", - config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", - description="Meta's reference implementation of inference with support for various model formats and optimization techniques.", - ), - InlineProviderSpec( - api=Api.inference, - provider_type="inline::sentence-transformers", - # CrossEncoder depends on torchao.quantization - pip_packages=[ - "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu", - "sentence-transformers --no-deps", - # required by some SentenceTransformers architectures for tensor rearrange/merge ops - "einops", - # fast HF tokenization backend used by SentenceTransformers models - "tokenizers", - # safe and fast file format for storing and loading tensors - "safetensors", - ], - module="llama_stack.providers.inline.inference.sentence_transformers", - config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", - description="Sentence Transformers inference provider for text embeddings and similarity search.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="cerebras", - provider_type="remote::cerebras", - pip_packages=[], - module="llama_stack.providers.remote.inference.cerebras", - config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", - description="Cerebras inference provider for running models on Cerebras Cloud platform.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="ollama", - provider_type="remote::ollama", - pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], - config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", - module="llama_stack.providers.remote.inference.ollama", - description="Ollama inference provider for running local models through the Ollama runtime.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="vllm", - provider_type="remote::vllm", - pip_packages=[], - module="llama_stack.providers.remote.inference.vllm", - config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig", - provider_data_validator="llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator", - description="Remote vLLM inference provider for connecting to vLLM servers.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="tgi", - provider_type="remote::tgi", - pip_packages=["huggingface_hub", "aiohttp"], - module="llama_stack.providers.remote.inference.tgi", - config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig", - description="Text Generation Inference (TGI) provider for HuggingFace model serving.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="hf::serverless", - provider_type="remote::hf::serverless", - pip_packages=["huggingface_hub", "aiohttp"], - module="llama_stack.providers.remote.inference.tgi", - config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig", - description="HuggingFace Inference API serverless provider for on-demand model inference.", - ), - RemoteProviderSpec( - api=Api.inference, - provider_type="remote::hf::endpoint", - adapter_type="hf::endpoint", - pip_packages=["huggingface_hub", "aiohttp"], - module="llama_stack.providers.remote.inference.tgi", - config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig", - description="HuggingFace Inference Endpoints provider for dedicated model serving.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="fireworks", - provider_type="remote::fireworks", - pip_packages=[ - "fireworks-ai<=0.17.16", - ], - module="llama_stack.providers.remote.inference.fireworks", - config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", - provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator", - description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="together", - provider_type="remote::together", - pip_packages=[ - "together", - ], - module="llama_stack.providers.remote.inference.together", - config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig", - provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", - description="Together AI inference provider for open-source models and collaborative AI development.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="bedrock", - provider_type="remote::bedrock", - pip_packages=["boto3"], - module="llama_stack.providers.remote.inference.bedrock", - config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", - description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="databricks", - provider_type="remote::databricks", - pip_packages=["databricks-sdk"], - module="llama_stack.providers.remote.inference.databricks", - config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", - description="Databricks inference provider for running models on Databricks' unified analytics platform.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="nvidia", - provider_type="remote::nvidia", - pip_packages=[], - module="llama_stack.providers.remote.inference.nvidia", - config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig", - description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="runpod", - provider_type="remote::runpod", - pip_packages=[], - module="llama_stack.providers.remote.inference.runpod", - config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig", - description="RunPod inference provider for running models on RunPod's cloud GPU platform.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="openai", - provider_type="remote::openai", - pip_packages=[], - module="llama_stack.providers.remote.inference.openai", - config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig", - provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", - description="OpenAI inference provider for accessing GPT models and other OpenAI services.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="anthropic", - provider_type="remote::anthropic", - pip_packages=["anthropic"], - module="llama_stack.providers.remote.inference.anthropic", - config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", - provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", - description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="gemini", - provider_type="remote::gemini", - pip_packages=[], - module="llama_stack.providers.remote.inference.gemini", - config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig", - provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator", - description="Google Gemini inference provider for accessing Gemini models and Google's AI services.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="vertexai", - provider_type="remote::vertexai", - pip_packages=[ - "google-cloud-aiplatform", - ], - module="llama_stack.providers.remote.inference.vertexai", - config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig", - provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator", - description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages: - -• Enterprise-grade security: Uses Google Cloud's security controls and IAM -• Better integration: Seamless integration with other Google Cloud services -• Advanced features: Access to additional Vertex AI features like model tuning and monitoring -• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys - -Configuration: -- Set VERTEX_AI_PROJECT environment variable (required) -- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1) -- Use Google Cloud Application Default Credentials or service account key - -Authentication Setup: -Option 1 (Recommended): gcloud auth application-default login -Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path - -Available Models: -- vertex_ai/gemini-2.0-flash -- vertex_ai/gemini-2.5-flash -- vertex_ai/gemini-2.5-pro""", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="groq", - provider_type="remote::groq", - pip_packages=[], - module="llama_stack.providers.remote.inference.groq", - config_class="llama_stack.providers.remote.inference.groq.GroqConfig", - provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", - description="Groq inference provider for ultra-fast inference using Groq's LPU technology.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="llama-openai-compat", - provider_type="remote::llama-openai-compat", - pip_packages=[], - module="llama_stack.providers.remote.inference.llama_openai_compat", - config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig", - provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", - description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="sambanova", - provider_type="remote::sambanova", - pip_packages=[], - module="llama_stack.providers.remote.inference.sambanova", - config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", - provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator", - description="SambaNova inference provider for running models on SambaNova's dataflow architecture.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="passthrough", - provider_type="remote::passthrough", - pip_packages=[], - module="llama_stack.providers.remote.inference.passthrough", - config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig", - provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator", - description="Passthrough inference provider for connecting to any external inference service not directly supported.", - ), - RemoteProviderSpec( - api=Api.inference, - adapter_type="watsonx", - provider_type="remote::watsonx", - pip_packages=["litellm"], - module="llama_stack.providers.remote.inference.watsonx", - config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", - provider_data_validator="llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator", - description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.", - ), - RemoteProviderSpec( - api=Api.inference, - provider_type="remote::azure", - adapter_type="azure", - pip_packages=[], - module="llama_stack.providers.remote.inference.azure", - config_class="llama_stack.providers.remote.inference.azure.AzureConfig", - provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator", - description=""" -Azure OpenAI inference provider for accessing GPT models and other Azure services. -Provider documentation -https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview -""", - ), - ] diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py deleted file mode 100644 index 2092e3b2d..000000000 --- a/llama_stack/providers/registry/post_training.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from typing import cast - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec - -# We provide two versions of these providers so that distributions can package the appropriate version of torch. -# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images. -torchtune_def = dict( - api=Api.post_training, - pip_packages=["numpy"], - module="llama_stack.providers.inline.post_training.torchtune", - config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.", -) - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - **{ # type: ignore - **torchtune_def, - "provider_type": "inline::torchtune-cpu", - "pip_packages": ( - cast(list[str], torchtune_def["pip_packages"]) - + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"] - ), - }, - ), - InlineProviderSpec( - **{ # type: ignore - **torchtune_def, - "provider_type": "inline::torchtune-gpu", - "pip_packages": ( - cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"] - ), - }, - ), - InlineProviderSpec( - api=Api.post_training, - provider_type="inline::huggingface-gpu", - pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"], - module="llama_stack.providers.inline.post_training.huggingface", - config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.", - ), - RemoteProviderSpec( - api=Api.post_training, - adapter_type="nvidia", - provider_type="remote::nvidia", - pip_packages=["requests", "aiohttp"], - module="llama_stack.providers.remote.post_training.nvidia", - config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig", - description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.", - ), - ] diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py deleted file mode 100644 index b30074398..000000000 --- a/llama_stack/providers/registry/safety.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, - RemoteProviderSpec, -) - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.safety, - provider_type="inline::prompt-guard", - pip_packages=[ - "transformers[accelerate]", - "torch --index-url https://download.pytorch.org/whl/cpu", - ], - module="llama_stack.providers.inline.safety.prompt_guard", - config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig", - description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.", - ), - InlineProviderSpec( - api=Api.safety, - provider_type="inline::llama-guard", - pip_packages=[], - module="llama_stack.providers.inline.safety.llama_guard", - config_class="llama_stack.providers.inline.safety.llama_guard.LlamaGuardConfig", - api_dependencies=[ - Api.inference, - ], - description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.", - ), - InlineProviderSpec( - api=Api.safety, - provider_type="inline::code-scanner", - pip_packages=[ - "codeshield", - ], - module="llama_stack.providers.inline.safety.code_scanner", - config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig", - description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.", - ), - RemoteProviderSpec( - api=Api.safety, - adapter_type="bedrock", - provider_type="remote::bedrock", - pip_packages=["boto3"], - module="llama_stack.providers.remote.safety.bedrock", - config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig", - description="AWS Bedrock safety provider for content moderation using AWS's safety services.", - ), - RemoteProviderSpec( - api=Api.safety, - adapter_type="nvidia", - provider_type="remote::nvidia", - pip_packages=["requests"], - module="llama_stack.providers.remote.safety.nvidia", - config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig", - description="NVIDIA's safety provider for content moderation and safety filtering.", - ), - RemoteProviderSpec( - api=Api.safety, - adapter_type="sambanova", - provider_type="remote::sambanova", - pip_packages=["litellm", "requests"], - module="llama_stack.providers.remote.safety.sambanova", - config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", - provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", - description="SambaNova's safety provider for content moderation and safety filtering.", - ), - ] diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py deleted file mode 100644 index a4ec54ed2..000000000 --- a/llama_stack/providers/registry/scoring.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::basic", - pip_packages=["requests"], - module="llama_stack.providers.inline.scoring.basic", - config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - description="Basic scoring provider for simple evaluation metrics and scoring functions.", - ), - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::llm-as-judge", - pip_packages=[], - module="llama_stack.providers.inline.scoring.llm_as_judge", - config_class="llama_stack.providers.inline.scoring.llm_as_judge.LlmAsJudgeScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - Api.inference, - ], - description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.", - ), - InlineProviderSpec( - api=Api.scoring, - provider_type="inline::braintrust", - pip_packages=["autoevals"], - module="llama_stack.providers.inline.scoring.braintrust", - config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator", - description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.", - ), - ] diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py deleted file mode 100644 index 39dc7fccd..000000000 --- a/llama_stack/providers/registry/tool_runtime.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, - RemoteProviderSpec, -) -from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.tool_runtime, - provider_type="inline::rag-runtime", - pip_packages=DEFAULT_VECTOR_IO_DEPS - + [ - "tqdm", - "numpy", - "scikit-learn", - "scipy", - "nltk", - "sentencepiece", - "transformers", - ], - module="llama_stack.providers.inline.tool_runtime.rag", - config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", - api_dependencies=[Api.vector_io, Api.inference, Api.files], - description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.", - ), - RemoteProviderSpec( - api=Api.tool_runtime, - adapter_type="brave-search", - provider_type="remote::brave-search", - module="llama_stack.providers.remote.tool_runtime.brave_search", - config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig", - pip_packages=["requests"], - provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator", - description="Brave Search tool for web search capabilities with privacy-focused results.", - ), - RemoteProviderSpec( - api=Api.tool_runtime, - adapter_type="bing-search", - provider_type="remote::bing-search", - module="llama_stack.providers.remote.tool_runtime.bing_search", - config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig", - pip_packages=["requests"], - provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator", - description="Bing Search tool for web search capabilities using Microsoft's search engine.", - ), - RemoteProviderSpec( - api=Api.tool_runtime, - adapter_type="tavily-search", - provider_type="remote::tavily-search", - module="llama_stack.providers.remote.tool_runtime.tavily_search", - config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig", - pip_packages=["requests"], - provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator", - description="Tavily Search tool for AI-optimized web search with structured results.", - ), - RemoteProviderSpec( - api=Api.tool_runtime, - adapter_type="wolfram-alpha", - provider_type="remote::wolfram-alpha", - module="llama_stack.providers.remote.tool_runtime.wolfram_alpha", - config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig", - pip_packages=["requests"], - provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator", - description="Wolfram Alpha tool for computational knowledge and mathematical calculations.", - ), - RemoteProviderSpec( - api=Api.tool_runtime, - adapter_type="model-context-protocol", - provider_type="remote::model-context-protocol", - module="llama_stack.providers.remote.tool_runtime.model_context_protocol", - config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig", - pip_packages=["mcp>=1.8.1"], - provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator", - description="Model Context Protocol (MCP) tool for standardized tool calling and context management.", - ), - ] diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py deleted file mode 100644 index ff3b8486f..000000000 --- a/llama_stack/providers/registry/vector_io.py +++ /dev/null @@ -1,828 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.datatypes import ( - Api, - InlineProviderSpec, - ProviderSpec, - RemoteProviderSpec, -) - -# Common dependencies for all vector IO providers that support document processing -DEFAULT_VECTOR_IO_DEPS = ["chardet", "pypdf"] - - -def available_providers() -> list[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::meta-reference", - pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.faiss", - config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", - deprecation_warning="Please use the `inline::faiss` provider instead.", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description="Meta's reference implementation of a vector database.", - ), - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::faiss", - pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.faiss", - config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It -allows you to store and query vectors directly in memory. -That means you'll get fast and efficient vector retrieval. - -## Features - -- Lightweight and easy to use -- Fully integrated with Llama Stack -- GPU support -- **Vector search** - FAISS supports pure vector similarity search using embeddings - -## Search Modes - -**Supported:** -- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings - -**Not Supported:** -- **Keyword Search** (`mode="keyword"`): Not supported by FAISS -- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS - -> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality. - -## Usage - -To use Faiss in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Faiss. -3. Start storing and querying vectors. - -## Installation - -You can install Faiss using pip: - -```bash -pip install faiss-cpu -``` -## Documentation -See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for -more details about Faiss in general. -""", - ), - # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a - # source distribution and the wheels are not available for all platforms. - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::sqlite-vec", - pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.sqlite_vec", - config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It -allows you to store and query vectors directly within an SQLite database. -That means you're not limited to storing vectors in memory or in a separate service. - -## Features - -- Lightweight and easy to use -- Fully integrated with Llama Stacks -- Uses disk-based storage for persistence, allowing for larger vector storage - -### Comparison to Faiss - -The choice between Faiss and sqlite-vec should be made based on the needs of your application, -as they have different strengths. - -#### Choosing the Right Provider - -Scenario | Recommended Tool | Reason --- |-----------------| -- -Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches -Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads -Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing -Large datasets | sqlite-vec | Disk-based storage for larger vector storage -Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration - -#### Empirical Example - -Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. - -```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png -:alt: Comparison of SQLite-Vec and Faiss write times -:width: 400px -``` - -You will notice that the average write time for `sqlite-vec` was 788ms, compared to -47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather -uniformly spread across the [1500, 100000] interval. - -Looking at each individual write in the order that the documents are inserted you'll see the increase in -write speed as Faiss reindexes the vectors after each write. -```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png -:alt: Comparison of SQLite-Vec and Faiss write times -:width: 400px -``` - -In comparison, the read times for Faiss was on average 10% faster than sqlite-vec. -The modes of the two distributions highlight the differences much further where Faiss -will likely yield faster read performance. - -```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png -:alt: Comparison of SQLite-Vec and Faiss read times -:width: 400px -``` - -## Usage - -To use sqlite-vec in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use SQLite-Vec. -3. Start storing and querying vectors. - -The SQLite-vec provider supports three search modes: - -1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings. -2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5. -3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates. - -Example with hybrid search: -```python -response = await vector_io.query_chunks( - vector_db_id="my_db", - query="your query here", - params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, -) - -# Using RRF ranker -response = await vector_io.query_chunks( - vector_db_id="my_db", - query="your query here", - params={ - "mode": "hybrid", - "max_chunks": 3, - "score_threshold": 0.7, - "ranker": {"type": "rrf", "impact_factor": 60.0}, - }, -) - -# Using weighted ranker -response = await vector_io.query_chunks( - vector_db_id="my_db", - query="your query here", - params={ - "mode": "hybrid", - "max_chunks": 3, - "score_threshold": 0.7, - "ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword - }, -) -``` - -Example with explicit vector search: -```python -response = await vector_io.query_chunks( - vector_db_id="my_db", - query="your query here", - params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, -) -``` - -Example with keyword search: -```python -response = await vector_io.query_chunks( - vector_db_id="my_db", - query="your query here", - params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, -) -``` - -## Supported Search Modes - -The SQLite vector store supports three search modes: - -1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks -2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks -3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker - -### Hybrid Search - -Hybrid search combines the strengths of both vector and keyword search by: -- Computing vector similarity scores -- Computing keyword match scores -- Using a ranker to combine these scores - -Two ranker types are supported: - -1. **RRF (Reciprocal Rank Fusion)**: - - Combines ranks from both vector and keyword results - - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results - - Good for balancing between vector and keyword results - - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks - -2. **Weighted**: - - Linearly combines normalized vector and keyword scores - - Uses an alpha parameter (0-1) to control the blend: - - alpha=0: Only use keyword scores - - alpha=1: Only use vector scores - - alpha=0.5: Equal weight to both (default) - -Example using RAGQueryConfig with different search modes: - -```python -from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker - -# Vector search -config = RAGQueryConfig(mode="vector", max_chunks=5) - -# Keyword search -config = RAGQueryConfig(mode="keyword", max_chunks=5) - -# Hybrid search with custom RRF ranker -config = RAGQueryConfig( - mode="hybrid", - max_chunks=5, - ranker=RRFRanker(impact_factor=50.0), # Custom impact factor -) - -# Hybrid search with weighted ranker -config = RAGQueryConfig( - mode="hybrid", - max_chunks=5, - ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword -) - -# Hybrid search with default RRF ranker -config = RAGQueryConfig( - mode="hybrid", max_chunks=5 -) # Will use RRF with impact_factor=60.0 -``` - -Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored. - -## Installation - -You can install SQLite-Vec using pip: - -```bash -pip install sqlite-vec -``` - -## Documentation - -See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. - -[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). -""", - ), - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::sqlite_vec", - pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.sqlite_vec", - config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", - deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -Please refer to the sqlite-vec provider documentation. -""", - ), - RemoteProviderSpec( - api=Api.vector_io, - adapter_type="chromadb", - provider_type="remote::chromadb", - pip_packages=["chromadb-client"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.remote.vector_io.chroma", - config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[Chroma](https://www.trychroma.com/) is an inline and remote vector -database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. -That means you're not limited to storing vectors in memory or in a separate service. - -## Features -Chroma supports: -- Store embeddings and their metadata -- Vector search -- Full-text search -- Document storage -- Metadata filtering -- Multi-modal retrieval - -## Usage - -To use Chrome in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use chroma. -3. Start storing and querying vectors. - -## Installation - -You can install chroma using pip: - -```bash -pip install chromadb -``` - -## Documentation -See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. -""", - ), - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::chromadb", - pip_packages=["chromadb"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.chroma", - config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[Chroma](https://www.trychroma.com/) is an inline and remote vector -database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. -That means you're not limited to storing vectors in memory or in a separate service. - -## Features -Chroma supports: -- Store embeddings and their metadata -- Vector search -- Full-text search -- Document storage -- Metadata filtering -- Multi-modal retrieval - -## Usage - -To use Chrome in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use chroma. -3. Start storing and querying vectors. - -## Installation - -You can install chroma using pip: - -```bash -pip install chromadb -``` - -## Documentation -See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. - -""", - ), - RemoteProviderSpec( - api=Api.vector_io, - adapter_type="pgvector", - provider_type="remote::pgvector", - pip_packages=["psycopg2-binary"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.remote.vector_io.pgvector", - config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It -allows you to store and query vectors directly in memory. -That means you'll get fast and efficient vector retrieval. - -## Features - -- Easy to use -- Fully integrated with Llama Stack - -There are three implementations of search for PGVectoIndex available: - -1. Vector Search: -- How it works: - - Uses PostgreSQL's vector extension (pgvector) to perform similarity search - - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics - - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance - --Characteristics: - - Semantic understanding - finds documents similar in meaning even if they don't share keywords - - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions) - - Best for: Finding conceptually related content, handling synonyms, cross-language search - -2. Keyword Search -- How it works: - - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank - - Converts text to searchable tokens using to_tsvector('english', text). Default language is English. - - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score - -- Characteristics: - - Lexical matching - finds exact keyword matches and variations - - Uses GIN (Generalized Inverted Index) for fast text search performance - - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring - - Best for: Exact term matching, proper names, technical terms, Boolean-style queries - -3. Hybrid Search -- How it works: - - Combines both vector and keyword search results - - Runs both searches independently, then merges results using configurable reranking - -- Two reranking strategies available: - - Reciprocal Rank Fusion (RRF) - (default: 60.0) - - Weighted Average - (default: 0.5) - -- Characteristics: - - Best of both worlds: semantic understanding + exact matching - - Documents appearing in both searches get boosted scores - - Configurable balance between semantic and lexical matching - - Best for: General-purpose search where you want both precision and recall - -4. Database Schema -The PGVector implementation stores data optimized for all three search types: -CREATE TABLE vector_store_xxx ( - id TEXT PRIMARY KEY, - document JSONB, -- Original document - embedding vector(dimension), -- For vector search - content_text TEXT, -- Raw text content - tokenized_content TSVECTOR -- For keyword search -); - --- Indexes for performance -CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search --- Vector index created automatically by pgvector - -## Usage - -To use PGVector in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector). -3. Start storing and querying vectors. - -## This is an example how you can set up your environment for using PGVector - -1. Export env vars: -```bash -export ENABLE_PGVECTOR=true -export PGVECTOR_HOST=localhost -export PGVECTOR_PORT=5432 -export PGVECTOR_DB=llamastack -export PGVECTOR_USER=llamastack -export PGVECTOR_PASSWORD=llamastack -``` - -2. Create DB: -```bash -psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';" -psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;" -psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;" -``` - -## Installation - -You can install PGVector using docker: - -```bash -docker pull pgvector/pgvector:pg17 -``` -## Documentation -See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. -""", - ), - RemoteProviderSpec( - api=Api.vector_io, - adapter_type="weaviate", - provider_type="remote::weaviate", - pip_packages=["weaviate-client>=4.16.5"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.remote.vector_io.weaviate", - config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", - provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. -It allows you to store and query vectors directly within a Weaviate database. -That means you're not limited to storing vectors in memory or in a separate service. - -## Features -Weaviate supports: -- Store embeddings and their metadata -- Vector search -- Full-text search -- Hybrid search -- Document storage -- Metadata filtering -- Multi-modal retrieval - - -## Usage - -To use Weaviate in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use chroma. -3. Start storing and querying vectors. - -## Installation - -To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart). - -## Documentation -See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. -""", - ), - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::qdrant", - pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.qdrant", - config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=r""" -[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It -allows you to store and query vectors directly in memory. -That means you'll get fast and efficient vector retrieval. - -> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in -> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. -> -> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] - - - -## Features - -- Lightweight and easy to use -- Fully integrated with Llama Stack -- Apache 2.0 license terms -- Store embeddings and their metadata -- Supports search by - [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) - and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search -- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) -- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) -- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) - -## Usage - -To use Qdrant in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Qdrant. -3. Start storing and querying vectors. - -## Installation - -You can install Qdrant using docker: - -```bash -docker pull qdrant/qdrant -``` -## Documentation -See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. -""", - ), - RemoteProviderSpec( - api=Api.vector_io, - adapter_type="qdrant", - provider_type="remote::qdrant", - pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.remote.vector_io.qdrant", - config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -Please refer to the inline provider documentation. -""", - ), - RemoteProviderSpec( - api=Api.vector_io, - adapter_type="milvus", - provider_type="remote::milvus", - pip_packages=["pymilvus>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.remote.vector_io.milvus", - config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It -allows you to store and query vectors directly within a Milvus database. -That means you're not limited to storing vectors in memory or in a separate service. - -## Features - -- Easy to use -- Fully integrated with Llama Stack -- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations) - -## Usage - -To use Milvus in your Llama Stack project, follow these steps: - -1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Milvus. -3. Start storing and querying vectors. - -## Installation - -If you want to use inline Milvus, you can install: - -```bash -pip install pymilvus[milvus-lite] -``` - -If you want to use remote Milvus, you can install: - -```bash -pip install pymilvus -``` - -## Configuration - -In Llama Stack, Milvus can be configured in two ways: -- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage -- **Remote Configuration** - Connects to a remote Milvus server - -### Inline (Local) Configuration - -The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files: - -```yaml -vector_io: - - provider_id: milvus - provider_type: inline::milvus - config: - db_path: ~/.llama/distributions/together/milvus_store.db -``` - -### Remote Configuration - -Remote configuration is suitable for larger data storage requirements: - -#### Standard Remote Connection - -```yaml -vector_io: - - provider_id: milvus - provider_type: remote::milvus - config: - uri: "http://:" - token: ":" -``` - -#### TLS-Enabled Remote Connection (One-way TLS) - -For connections to Milvus instances with one-way TLS enabled: - -```yaml -vector_io: - - provider_id: milvus - provider_type: remote::milvus - config: - uri: "https://:" - token: ":" - secure: True - server_pem_path: "/path/to/server.pem" -``` - -#### Mutual TLS (mTLS) Remote Connection - -For connections to Milvus instances with mutual TLS (mTLS) enabled: - -```yaml -vector_io: - - provider_id: milvus - provider_type: remote::milvus - config: - uri: "https://:" - token: ":" - secure: True - ca_pem_path: "/path/to/ca.pem" - client_pem_path: "/path/to/client.pem" - client_key_path: "/path/to/client.key" -``` - -#### Key Parameters for TLS Configuration - -- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. -- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS). -- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). -- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). -- **`client_key_path`**: Path to the **client private key** file (required for mTLS). - -## Search Modes - -Milvus supports three different search modes for both inline and remote configurations: - -### Vector Search -Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content. - -```python -# Vector search example -search_response = client.vector_stores.search( - vector_store_id=vector_store.id, - query="What is machine learning?", - search_mode="vector", - max_num_results=5, -) -``` - -### Keyword Search -Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches. - -```python -# Keyword search example -search_response = client.vector_stores.search( - vector_store_id=vector_store.id, - query="Python programming language", - search_mode="keyword", - max_num_results=5, -) -``` - -### Hybrid Search -Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching. - -#### Basic Hybrid Search -```python -# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0) -search_response = client.vector_stores.search( - vector_store_id=vector_store.id, - query="neural networks in Python", - search_mode="hybrid", - max_num_results=5, -) -``` - -**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009). - -#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker -RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results. - -```python -# Hybrid search with custom RRF parameters -search_response = client.vector_stores.search( - vector_store_id=vector_store.id, - query="neural networks in Python", - search_mode="hybrid", - max_num_results=5, - ranking_options={ - "ranker": { - "type": "rrf", - "impact_factor": 100.0, # Higher values give more weight to top-ranked results - } - }, -) -``` - -#### Hybrid Search with Weighted Ranker -Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods. - -```python -# Hybrid search with weighted ranker -search_response = client.vector_stores.search( - vector_store_id=vector_store.id, - query="neural networks in Python", - search_mode="hybrid", - max_num_results=5, - ranking_options={ - "ranker": { - "type": "weighted", - "alpha": 0.7, # 70% vector search, 30% keyword search - } - }, -) -``` - -For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md). - -## Documentation -See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. - -For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). -""", - ), - InlineProviderSpec( - api=Api.vector_io, - provider_type="inline::milvus", - pip_packages=["pymilvus[milvus-lite]>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS, - module="llama_stack.providers.inline.vector_io.milvus", - config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig", - api_dependencies=[Api.inference], - optional_api_dependencies=[Api.files, Api.models], - description=""" -Please refer to the remote provider documentation. -""", - ), - ] diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md deleted file mode 100644 index da57d5550..000000000 --- a/llama_stack/providers/remote/datasetio/nvidia/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# NVIDIA DatasetIO Provider for LlamaStack - -This provider enables dataset management using NVIDIA's NeMo Customizer service. - -## Features - -- Register datasets for fine-tuning LLMs -- Unregister datasets - -## Getting Started - -### Prerequisites - -- LlamaStack with NVIDIA configuration -- Access to Hosted NVIDIA NeMo Microservice -- API key for authentication with the NVIDIA service - -### Setup - -Build the NVIDIA environment: - -```bash -uv run llama stack list-deps nvidia | xargs -L1 uv pip install -``` - -### Basic Usage using the LlamaStack Python Client - -#### Initialize the client - -```python -import os - -os.environ["NVIDIA_API_KEY"] = "your-api-key" -os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" -os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" -os.environ["NVIDIA_PROJECT_ID"] = "test-project" -from llama_stack.core.library_client import LlamaStackAsLibraryClient - -client = LlamaStackAsLibraryClient("nvidia") -client.initialize() -``` - -#### Register a dataset - -```python -client.datasets.register( - purpose="post-training/messages", - dataset_id="my-training-dataset", - source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"}, - metadata={ - "format": "json", - "description": "Dataset for LLM fine-tuning", - "provider": "nvidia", - }, -) -``` - -#### Get a list of all registered datasets - -```python -datasets = client.datasets.list() -for dataset in datasets: - print(f"Dataset ID: {dataset.identifier}") - print(f"Description: {dataset.metadata.get('description', '')}") - print(f"Source: {dataset.source.uri}") - print("---") -``` - -#### Unregister a dataset - -```python -client.datasets.unregister(dataset_id="my-training-dataset") -``` diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py deleted file mode 100644 index f723c92cc..000000000 --- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -import aiohttp - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.datasets import Dataset - -from .config import NvidiaDatasetIOConfig - - -class NvidiaDatasetIOAdapter: - """Nvidia NeMo DatasetIO API.""" - - def __init__(self, config: NvidiaDatasetIOConfig): - self.config = config - self.headers = {} - - async def _make_request( - self, - method: str, - path: str, - headers: dict[str, Any] | None = None, - params: dict[str, Any] | None = None, - json: dict[str, Any] | None = None, - **kwargs, - ) -> dict[str, Any]: - """Helper method to make HTTP requests to the Customizer API.""" - url = f"{self.config.datasets_url}{path}" - request_headers = self.headers.copy() - - # Set default Content-Type for JSON requests - if json is not None: - request_headers["Content-Type"] = "application/json" - - if headers: - request_headers.update(headers) - - async with aiohttp.ClientSession(headers=request_headers) as session: - async with session.request(method, url, params=params, json=json, **kwargs) as response: - if response.status != 200: - error_data = await response.json() - raise Exception(f"API request failed: {error_data}") - return await response.json() - - async def register_dataset( - self, - dataset_def: Dataset, - ) -> Dataset: - """Register a new dataset. - - Args: - dataset_def [Dataset]: The dataset definition. - dataset_id [str]: The ID of the dataset. - source [DataSource]: The source of the dataset. - metadata [Dict[str, Any]]: The metadata of the dataset. - format [str]: The format of the dataset. - description [str]: The description of the dataset. - Returns: - Dataset - """ - # add warnings for unsupported params - request_body = { - "name": dataset_def.identifier, - "namespace": self.config.dataset_namespace, - "files_url": dataset_def.source.uri, - "project": self.config.project_id, - } - if dataset_def.metadata: - request_body["format"] = dataset_def.metadata.get("format") - request_body["description"] = dataset_def.metadata.get("description") - await self._make_request( - "POST", - "/v1/datasets", - json=request_body, - ) - return dataset_def - - async def update_dataset( - self, - dataset_id: str, - dataset_schema: dict[str, ParamType], - url: URL, - provider_dataset_id: str | None = None, - provider_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - raise NotImplementedError("Not implemented") - - async def unregister_dataset( - self, - dataset_id: str, - ) -> None: - await self._make_request( - "DELETE", - f"/v1/datasets/{self.config.dataset_namespace}/{dataset_id}", - headers={"Accept": "application/json", "Content-Type": "application/json"}, - ) - - async def iterrows( - self, - dataset_id: str, - start_index: int | None = None, - limit: int | None = None, - ) -> PaginatedResponse: - raise NotImplementedError("Not implemented") - - async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: - raise NotImplementedError("Not implemented") diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/llama_stack/providers/remote/eval/nvidia/eval.py deleted file mode 100644 index 8fc7ffdd3..000000000 --- a/llama_stack/providers/remote/eval/nvidia/eval.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from typing import Any - -import requests - -from llama_stack.apis.agents import Agents -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import Inference -from llama_stack.apis.scoring import Scoring, ScoringResult -from llama_stack.providers.datatypes import BenchmarksProtocolPrivate -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - -from .....apis.common.job_types import Job, JobStatus -from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse -from .config import NVIDIAEvalConfig - -DEFAULT_NAMESPACE = "nvidia" - - -class NVIDIAEvalImpl( - Eval, - BenchmarksProtocolPrivate, - ModelRegistryHelper, -): - def __init__( - self, - config: NVIDIAEvalConfig, - datasetio_api: DatasetIO, - datasets_api: Datasets, - scoring_api: Scoring, - inference_api: Inference, - agents_api: Agents, - ) -> None: - self.config = config - self.datasetio_api = datasetio_api - self.datasets_api = datasets_api - self.scoring_api = scoring_api - self.inference_api = inference_api - self.agents_api = agents_api - - ModelRegistryHelper.__init__(self) - - async def initialize(self) -> None: ... - - async def shutdown(self) -> None: ... - - async def _evaluator_get(self, path: str): - """Helper for making GET requests to the evaluator service.""" - response = requests.get(url=f"{self.config.evaluator_url}{path}") - response.raise_for_status() - return response.json() - - async def _evaluator_post(self, path: str, data: dict[str, Any]): - """Helper for making POST requests to the evaluator service.""" - response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data) - response.raise_for_status() - return response.json() - - async def _evaluator_delete(self, path: str) -> None: - """Helper for making DELETE requests to the evaluator service.""" - response = requests.delete(url=f"{self.config.evaluator_url}{path}") - response.raise_for_status() - - async def register_benchmark(self, task_def: Benchmark) -> None: - """Register a benchmark as an evaluation configuration.""" - await self._evaluator_post( - "/v1/evaluation/configs", - { - "namespace": DEFAULT_NAMESPACE, - "name": task_def.benchmark_id, - # metadata is copied to request body as-is - **task_def.metadata, - }, - ) - - async def unregister_benchmark(self, benchmark_id: str) -> None: - """Unregister a benchmark evaluation configuration from NeMo Evaluator.""" - await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}") - - async def run_eval( - self, - benchmark_id: str, - benchmark_config: BenchmarkConfig, - ) -> Job: - """Run an evaluation job for a benchmark.""" - model = ( - benchmark_config.eval_candidate.model - if benchmark_config.eval_candidate.type == "model" - else benchmark_config.eval_candidate.config.model - ) - nvidia_model = self.get_provider_model_id(model) or model - - result = await self._evaluator_post( - "/v1/evaluation/jobs", - { - "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}", - "target": {"type": "model", "model": nvidia_model}, - }, - ) - - return Job(job_id=result["id"], status=JobStatus.in_progress) - - async def evaluate_rows( - self, - benchmark_id: str, - input_rows: list[dict[str, Any]], - scoring_functions: list[str], - benchmark_config: BenchmarkConfig, - ) -> EvaluateResponse: - raise NotImplementedError() - - async def job_status(self, benchmark_id: str, job_id: str) -> Job: - """Get the status of an evaluation job. - - EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed". - JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed" - """ - result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}") - result_status = result["status"] - - job_status = JobStatus.failed - if result_status in ["created", "pending"]: - job_status = JobStatus.scheduled - elif result_status in ["running"]: - job_status = JobStatus.in_progress - elif result_status in ["completed"]: - job_status = JobStatus.completed - elif result_status in ["cancelled"]: - job_status = JobStatus.cancelled - - return Job(job_id=job_id, status=job_status) - - async def job_cancel(self, benchmark_id: str, job_id: str) -> None: - """Cancel the evaluation job.""" - await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {}) - - async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: - """Returns the results of the evaluation job.""" - - job = await self.job_status(benchmark_id, job_id) - status = job.status - if not status or status != JobStatus.completed: - raise ValueError(f"Job {job_id} not completed. Status: {status.value}") - - result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results") - - return EvaluateResponse( - # TODO: these are stored in detailed results on NeMo Evaluator side; can be added - generations=[], - scores={ - benchmark_id: ScoringResult( - score_rows=[], - aggregated_results=result, - ) - }, - ) diff --git a/llama_stack/providers/remote/files/s3/files.py b/llama_stack/providers/remote/files/s3/files.py deleted file mode 100644 index c0e9f81d6..000000000 --- a/llama_stack/providers/remote/files/s3/files.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import uuid -from datetime import UTC, datetime -from typing import Annotated, Any - -import boto3 -from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError -from fastapi import Depends, File, Form, Response, UploadFile - -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import ( - ExpiresAfter, - Files, - ListOpenAIFileResponse, - OpenAIFileDeleteResponse, - OpenAIFileObject, - OpenAIFilePurpose, -) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.id_generation import generate_object_id -from llama_stack.providers.utils.files.form_data import parse_expires_after -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl - -from .config import S3FilesImplConfig - -# TODO: provider data for S3 credentials - - -def _create_s3_client(config: S3FilesImplConfig) -> boto3.client: - try: - s3_config = { - "region_name": config.region, - } - - # endpoint URL if specified (for MinIO, LocalStack, etc.) - if config.endpoint_url: - s3_config["endpoint_url"] = config.endpoint_url - - if config.aws_access_key_id and config.aws_secret_access_key: - s3_config.update( - { - "aws_access_key_id": config.aws_access_key_id, - "aws_secret_access_key": config.aws_secret_access_key, - } - ) - - return boto3.client("s3", **s3_config) - - except (BotoCoreError, NoCredentialsError) as e: - raise RuntimeError(f"Failed to initialize S3 client: {e}") from e - - -async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImplConfig) -> None: - try: - client.head_bucket(Bucket=config.bucket_name) - except ClientError as e: - error_code = e.response["Error"]["Code"] - if error_code == "404": - if not config.auto_create_bucket: - raise RuntimeError( - f"S3 bucket '{config.bucket_name}' does not exist. " - f"Either create the bucket manually or set 'auto_create_bucket: true' in your configuration." - ) from e - try: - # For us-east-1, we can't specify LocationConstraint - if config.region == "us-east-1": - client.create_bucket(Bucket=config.bucket_name) - else: - client.create_bucket( - Bucket=config.bucket_name, - CreateBucketConfiguration={"LocationConstraint": config.region}, - ) - except ClientError as create_error: - raise RuntimeError( - f"Failed to create S3 bucket '{config.bucket_name}': {create_error}" - ) from create_error - elif error_code == "403": - raise RuntimeError(f"Access denied to S3 bucket '{config.bucket_name}'") from e - else: - raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e - - -def _make_file_object( - *, - id: str, - filename: str, - purpose: str, - bytes: int, - created_at: int, - expires_at: int, - **kwargs: Any, # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore -) -> OpenAIFileObject: - """ - Construct an OpenAIFileObject and normalize expires_at. - - If expires_at is greater than the max we treat it as no-expiration and - return None for expires_at. - - The OpenAI spec says expires_at type is Integer, but the implementation - will return None for no expiration. - """ - obj = OpenAIFileObject( - id=id, - filename=filename, - purpose=OpenAIFilePurpose(purpose), - bytes=bytes, - created_at=created_at, - expires_at=expires_at, - ) - - if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX): - obj.expires_at = None # type: ignore - - return obj - - -class S3FilesImpl(Files): - """S3-based implementation of the Files API.""" - - def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None: - self._config = config - self.policy = policy - self._client: boto3.client | None = None - self._sql_store: AuthorizedSqlStore | None = None - - def _now(self) -> int: - """Return current UTC timestamp as int seconds.""" - return int(datetime.now(UTC).timestamp()) - - async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]: - where: dict[str, str | dict] = {"id": file_id} - if not return_expired: - where["expires_at"] = {">": self._now()} - if not (row := await self.sql_store.fetch_one("openai_files", where=where)): - raise ResourceNotFoundError(file_id, "File", "files.list()") - return row - - async def _delete_file(self, file_id: str) -> None: - """Delete a file from S3 and the database.""" - try: - self.client.delete_object( - Bucket=self._config.bucket_name, - Key=file_id, - ) - except ClientError as e: - if e.response["Error"]["Code"] != "NoSuchKey": - raise RuntimeError(f"Failed to delete file from S3: {e}") from e - - await self.sql_store.delete("openai_files", where={"id": file_id}) - - async def _delete_if_expired(self, file_id: str) -> None: - """If the file exists and is expired, delete it.""" - if row := await self._get_file(file_id, return_expired=True): - if (expires_at := row.get("expires_at")) and expires_at <= self._now(): - await self._delete_file(file_id) - - async def initialize(self) -> None: - self._client = _create_s3_client(self._config) - await _create_bucket_if_not_exists(self._client, self._config) - - self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy) - await self._sql_store.create_table( - "openai_files", - { - "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "filename": ColumnType.STRING, - "purpose": ColumnType.STRING, - "bytes": ColumnType.INTEGER, - "created_at": ColumnType.INTEGER, - "expires_at": ColumnType.INTEGER, - # TODO: add s3_etag field for integrity checking - }, - ) - - async def shutdown(self) -> None: - pass - - @property - def client(self) -> boto3.client: - assert self._client is not None, "Provider not initialized" - return self._client - - @property - def sql_store(self) -> AuthorizedSqlStore: - assert self._sql_store is not None, "Provider not initialized" - return self._sql_store - - async def openai_upload_file( - self, - file: Annotated[UploadFile, File()], - purpose: Annotated[OpenAIFilePurpose, Form()], - expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, - ) -> OpenAIFileObject: - file_id = generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}") - - filename = getattr(file, "filename", None) or "uploaded_file" - - created_at = self._now() - - # the default is no expiration. - # to implement no expiration we set an expiration beyond the max. - # we'll hide this fact from users when returning the file object. - expires_at = created_at + ExpiresAfter.MAX * 42 - # the default for BATCH files is 30 days, which happens to be the expiration max. - if purpose == OpenAIFilePurpose.BATCH: - expires_at = created_at + ExpiresAfter.MAX - - if expires_after is not None: - expires_at = created_at + expires_after.seconds - - content = await file.read() - file_size = len(content) - - entry: dict[str, Any] = { - "id": file_id, - "filename": filename, - "purpose": purpose.value, - "bytes": file_size, - "created_at": created_at, - "expires_at": expires_at, - } - - await self.sql_store.insert("openai_files", entry) - - try: - self.client.put_object( - Bucket=self._config.bucket_name, - Key=file_id, - Body=content, - # TODO: enable server-side encryption - ) - except ClientError as e: - await self.sql_store.delete("openai_files", where={"id": file_id}) - - raise RuntimeError(f"Failed to upload file to S3: {e}") from e - - return _make_file_object(**entry) - - async def openai_list_files( - self, - after: str | None = None, - limit: int | None = 10000, - order: Order | None = Order.desc, - purpose: OpenAIFilePurpose | None = None, - ) -> ListOpenAIFileResponse: - # this purely defensive. it should not happen because the router also default to Order.desc. - if not order: - order = Order.desc - - where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}} - if purpose: - where_conditions["purpose"] = purpose.value - - paginated_result = await self.sql_store.fetch_all( - table="openai_files", - where=where_conditions, - order_by=[("created_at", order.value)], - cursor=("id", after) if after else None, - limit=limit, - ) - - files = [_make_file_object(**row) for row in paginated_result.data] - - return ListOpenAIFileResponse( - data=files, - has_more=paginated_result.has_more, - # empty string or None? spec says str, ref impl returns str | None, we go with spec - first_id=files[0].id if files else "", - last_id=files[-1].id if files else "", - ) - - async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: - await self._delete_if_expired(file_id) - row = await self._get_file(file_id) - return _make_file_object(**row) - - async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: - await self._delete_if_expired(file_id) - _ = await self._get_file(file_id) # raises if not found - await self._delete_file(file_id) - return OpenAIFileDeleteResponse(id=file_id, deleted=True) - - async def openai_retrieve_file_content(self, file_id: str) -> Response: - await self._delete_if_expired(file_id) - - row = await self._get_file(file_id) - - try: - response = self.client.get_object( - Bucket=self._config.bucket_name, - Key=row["id"], - ) - # TODO: can we stream this instead of loading it into memory - content = response["Body"].read() - except ClientError as e: - if e.response["Error"]["Code"] == "NoSuchKey": - await self._delete_file(file_id) - raise ResourceNotFoundError(file_id, "File", "files.list()") from e - raise RuntimeError(f"Failed to download file from S3: {e}") from e - - return Response( - content=content, - media_type="application/octet-stream", - headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'}, - ) diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/llama_stack/providers/remote/inference/anthropic/config.py deleted file mode 100644 index 31e6aa12b..000000000 --- a/llama_stack/providers/remote/inference/anthropic/config.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class AnthropicProviderDataValidator(BaseModel): - anthropic_api_key: str | None = Field( - default=None, - description="API key for Anthropic models", - ) - - -@json_schema_type -class AnthropicConfig(RemoteInferenceProviderConfig): - @classmethod - def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/azure/config.py b/llama_stack/providers/remote/inference/azure/config.py deleted file mode 100644 index 7c31df7a6..000000000 --- a/llama_stack/providers/remote/inference/azure/config.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -from typing import Any - -from pydantic import BaseModel, Field, HttpUrl, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class AzureProviderDataValidator(BaseModel): - azure_api_key: SecretStr = Field( - description="Azure API key for Azure", - ) - azure_api_base: HttpUrl = Field( - description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)", - ) - azure_api_version: str | None = Field( - default=None, - description="Azure API version for Azure (e.g., 2024-06-01)", - ) - azure_api_type: str | None = Field( - default="azure", - description="Azure API type for Azure (e.g., azure)", - ) - - -@json_schema_type -class AzureConfig(RemoteInferenceProviderConfig): - api_base: HttpUrl = Field( - description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)", - ) - api_version: str | None = Field( - default_factory=lambda: os.getenv("AZURE_API_VERSION"), - description="Azure API version for Azure (e.g., 2024-12-01-preview)", - ) - api_type: str | None = Field( - default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"), - description="Azure API type for Azure (e.g., azure)", - ) - - @classmethod - def sample_run_config( - cls, - api_key: str = "${env.AZURE_API_KEY:=}", - api_base: str = "${env.AZURE_API_BASE:=}", - api_version: str = "${env.AZURE_API_VERSION:=}", - api_type: str = "${env.AZURE_API_TYPE:=}", - **kwargs, - ) -> dict[str, Any]: - return { - "api_key": api_key, - "api_base": api_base, - "api_version": api_version, - "api_type": api_type, - } diff --git a/llama_stack/providers/remote/inference/bedrock/__init__.py b/llama_stack/providers/remote/inference/bedrock/__init__.py deleted file mode 100644 index 4d98f4999..000000000 --- a/llama_stack/providers/remote/inference/bedrock/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -from .config import BedrockConfig - - -async def get_adapter_impl(config: BedrockConfig, _deps): - from .bedrock import BedrockInferenceAdapter - - assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" - - impl = BedrockInferenceAdapter(config) - - await impl.initialize() - - return impl diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py deleted file mode 100644 index d266f9e6f..000000000 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from collections.abc import AsyncIterator - -from botocore.client import BaseClient - -from llama_stack.apis.inference import ( - ChatCompletionRequest, - Inference, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, -) -from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig -from llama_stack.providers.utils.bedrock.client import create_bedrock_client -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, -) -from llama_stack.providers.utils.inference.openai_compat import ( - get_sampling_strategy_options, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, -) - -from .models import MODEL_ENTRIES - -REGION_PREFIX_MAP = { - "us": "us.", - "eu": "eu.", - "ap": "ap.", -} - - -def _get_region_prefix(region: str | None) -> str: - # AWS requires region prefixes for inference profiles - if region is None: - return "us." # default to US when we don't know - - # Handle case insensitive region matching - region_lower = region.lower() - for prefix in REGION_PREFIX_MAP: - if region_lower.startswith(f"{prefix}-"): - return REGION_PREFIX_MAP[prefix] - - # Fallback to US for anything we don't recognize - return "us." - - -def _to_inference_profile_id(model_id: str, region: str = None) -> str: - # Return ARNs unchanged - if model_id.startswith("arn:"): - return model_id - - # Return inference profile IDs that already have regional prefixes - if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()): - return model_id - - # Default to US East when no region is provided - if region is None: - region = "us-east-1" - - return _get_region_prefix(region) + model_id - - -class BedrockInferenceAdapter( - ModelRegistryHelper, - Inference, -): - def __init__(self, config: BedrockConfig) -> None: - ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) - self._config = config - self._client = None - - @property - def client(self) -> BaseClient: - if self._client is None: - self._client = create_bedrock_client(self._config) - return self._client - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - if self._client is not None: - self._client.close() - - async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict: - bedrock_model = request.model - - sampling_params = request.sampling_params - options = get_sampling_strategy_options(sampling_params) - - if sampling_params.max_tokens: - options["max_gen_len"] = sampling_params.max_tokens - if sampling_params.repetition_penalty > 0: - options["repetition_penalty"] = sampling_params.repetition_penalty - - prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model)) - - # Convert foundation model ID to inference profile ID - region_name = self.client.meta.region_name - inference_profile_id = _to_inference_profile_id(bedrock_model, region_name) - - return { - "modelId": inference_profile_id, - "body": json.dumps( - { - "prompt": prompt, - **options, - } - ), - } - - async def openai_embeddings( - self, - params: OpenAIEmbeddingsRequestWithExtraBody, - ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() - - async def openai_completion( - self, - params: OpenAICompletionRequestWithExtraBody, - ) -> OpenAICompletion: - raise NotImplementedError("OpenAI completion not supported by the Bedrock provider") - - async def openai_chat_completion( - self, - params: OpenAIChatCompletionRequestWithExtraBody, - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider") diff --git a/llama_stack/providers/remote/inference/bedrock/config.py b/llama_stack/providers/remote/inference/bedrock/config.py deleted file mode 100644 index 5961a2f15..000000000 --- a/llama_stack/providers/remote/inference/bedrock/config.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig - - -class BedrockConfig(BedrockBaseConfig): - pass diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py deleted file mode 100644 index 17273c122..000000000 --- a/llama_stack/providers/remote/inference/bedrock/models.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_entry, -) - -SAFETY_MODELS_ENTRIES = [] - - -# https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html -MODEL_ENTRIES = [ - build_hf_repo_model_entry( - "meta.llama3-1-8b-instruct-v1:0", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_hf_repo_model_entry( - "meta.llama3-1-70b-instruct-v1:0", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta.llama3-1-405b-instruct-v1:0", - CoreModelId.llama3_1_405b_instruct.value, - ), -] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py deleted file mode 100644 index dc9a0f5fc..000000000 --- a/llama_stack/providers/remote/inference/cerebras/config.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -from typing import Any - -from pydantic import Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - -DEFAULT_BASE_URL = "https://api.cerebras.ai" - - -@json_schema_type -class CerebrasImplConfig(RemoteInferenceProviderConfig): - base_url: str = Field( - default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL), - description="Base URL for the Cerebras API", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "base_url": DEFAULT_BASE_URL, - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/llama_stack/providers/remote/inference/databricks/config.py deleted file mode 100644 index 49d19cd35..000000000 --- a/llama_stack/providers/remote/inference/databricks/config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class DatabricksImplConfig(RemoteInferenceProviderConfig): - url: str | None = Field( - default=None, - description="The URL for the Databricks model serving endpoint", - ) - auth_credential: SecretStr | None = Field( - default=None, - alias="api_token", - description="The Databricks API token", - ) - - @classmethod - def sample_run_config( - cls, - url: str = "${env.DATABRICKS_HOST:=}", - api_token: str = "${env.DATABRICKS_TOKEN:=}", - **kwargs: Any, - ) -> dict[str, Any]: - return { - "url": url, - "api_token": api_token, - } diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py deleted file mode 100644 index 44996507f..000000000 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import Iterable - -from databricks.sdk import WorkspaceClient - -from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - -from .config import DatabricksImplConfig - -logger = get_logger(name=__name__, category="inference::databricks") - - -class DatabricksInferenceAdapter(OpenAIMixin): - config: DatabricksImplConfig - - # source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models - embedding_model_metadata: dict[str, dict[str, int]] = { - "databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192}, - "databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512}, - } - - def get_base_url(self) -> str: - return f"{self.config.url}/serving-endpoints" - - async def list_provider_model_ids(self) -> Iterable[str]: - return [ - endpoint.name - for endpoint in WorkspaceClient( - host=self.config.url, token=self.get_api_key() - ).serving_endpoints.list() # TODO: this is not async - ] - - async def openai_completion( - self, - params: OpenAICompletionRequestWithExtraBody, - ) -> OpenAICompletion: - raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/llama_stack/providers/remote/inference/fireworks/config.py deleted file mode 100644 index 20ba99606..000000000 --- a/llama_stack/providers/remote/inference/fireworks/config.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class FireworksImplConfig(RemoteInferenceProviderConfig): - url: str = Field( - default="https://api.fireworks.ai/inference/v1", - description="The URL for the Fireworks server", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "url": "https://api.fireworks.ai/inference/v1", - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/llama_stack/providers/remote/inference/gemini/config.py deleted file mode 100644 index df5da29a2..000000000 --- a/llama_stack/providers/remote/inference/gemini/config.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class GeminiProviderDataValidator(BaseModel): - gemini_api_key: str | None = Field( - default=None, - description="API key for Gemini models", - ) - - -@json_schema_type -class GeminiConfig(RemoteInferenceProviderConfig): - @classmethod - def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/groq/config.py b/llama_stack/providers/remote/inference/groq/config.py deleted file mode 100644 index c1aedca3e..000000000 --- a/llama_stack/providers/remote/inference/groq/config.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class GroqProviderDataValidator(BaseModel): - groq_api_key: str | None = Field( - default=None, - description="API key for Groq models", - ) - - -@json_schema_type -class GroqConfig(RemoteInferenceProviderConfig): - url: str = Field( - default="https://api.groq.com", - description="The URL for the Groq AI server", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "url": "https://api.groq.com", - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/llama_stack/providers/remote/inference/llama_openai_compat/config.py deleted file mode 100644 index 4b5750ed4..000000000 --- a/llama_stack/providers/remote/inference/llama_openai_compat/config.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class LlamaProviderDataValidator(BaseModel): - llama_api_key: str | None = Field( - default=None, - description="API key for api.llama models", - ) - - -@json_schema_type -class LlamaCompatConfig(RemoteInferenceProviderConfig): - openai_compat_api_base: str = Field( - default="https://api.llama.com/compat/v1/", - description="The URL for the Llama API server", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]: - return { - "openai_compat_api_base": "https://api.llama.com/compat/v1/", - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/llama_stack/providers/remote/inference/nvidia/__init__.py deleted file mode 100644 index b4926f33e..000000000 --- a/llama_stack/providers/remote/inference/nvidia/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.inference import Inference - -from .config import NVIDIAConfig - - -async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference: - # import dynamically so `llama stack list-deps` does not fail due to missing dependencies - from .nvidia import NVIDIAInferenceAdapter - - if not isinstance(config, NVIDIAConfig): - raise RuntimeError(f"Unexpected config type: {type(config)}") - adapter = NVIDIAInferenceAdapter(config=config) - await adapter.initialize() - return adapter - - -__all__ = ["get_adapter_impl", "NVIDIAConfig"] diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py deleted file mode 100644 index 2171877a5..000000000 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -from typing import Any - -from pydantic import Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class NVIDIAConfig(RemoteInferenceProviderConfig): - """ - Configuration for the NVIDIA NIM inference endpoint. - - Attributes: - url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000 - api_key (str): The access key for the hosted NIM endpoints - - There are two ways to access NVIDIA NIMs - - 0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com - 1. Self-hosted: You can run NVIDIA NIMs on your own infrastructure - - By default the configuration is set to use the hosted APIs. This requires - an API key which can be obtained from https://ngc.nvidia.com/. - - By default the configuration will attempt to read the NVIDIA_API_KEY environment - variable to set the api_key. Please do not put your API key in code. - - If you are using a self-hosted NVIDIA NIM, you can set the url to the - URL of your running NVIDIA NIM and do not need to set the api_key. - """ - - url: str = Field( - default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"), - description="A base url for accessing the NVIDIA NIM", - ) - timeout: int = Field( - default=60, - description="Timeout for the HTTP requests", - ) - append_api_version: bool = Field( - default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false", - description="When set to false, the API version will not be appended to the base_url. By default, it is true.", - ) - - @classmethod - def sample_run_config( - cls, - url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", - api_key: str = "${env.NVIDIA_API_KEY:=}", - append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}", - **kwargs, - ) -> dict[str, Any]: - return { - "url": url, - "api_key": api_key, - "append_api_version": append_api_version, - } diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py deleted file mode 100644 index 5aba6bddc..000000000 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - -from . import NVIDIAConfig -from .utils import _is_nvidia_hosted - -logger = get_logger(name=__name__, category="inference::nvidia") - - -class NVIDIAInferenceAdapter(OpenAIMixin): - config: NVIDIAConfig - - """ - NVIDIA Inference Adapter for Llama Stack. - """ - - # source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html - embedding_model_metadata: dict[str, dict[str, int]] = { - "nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192}, - "nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024}, - "nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096}, - "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024}, - } - - async def initialize(self) -> None: - logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...") - - if _is_nvidia_hosted(self.config): - if not self.config.auth_credential: - raise RuntimeError( - "API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM." - ) - - def get_api_key(self) -> str: - """ - Get the API key for OpenAI mixin. - - :return: The NVIDIA API key - """ - if self.config.auth_credential: - return self.config.auth_credential.get_secret_value() - - if not _is_nvidia_hosted(self.config): - return "NO KEY REQUIRED" - - return None - - def get_base_url(self) -> str: - """ - Get the base URL for OpenAI mixin. - - :return: The NVIDIA API base URL - """ - return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py deleted file mode 100644 index 46ee939d9..000000000 --- a/llama_stack/providers/remote/inference/nvidia/utils.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from . import NVIDIAConfig - - -def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: - return "integrate.api.nvidia.com" in config.url diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py deleted file mode 100644 index 416b847a0..000000000 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig - -DEFAULT_OLLAMA_URL = "http://localhost:11434" - - -class OllamaImplConfig(RemoteInferenceProviderConfig): - auth_credential: SecretStr | None = Field(default=None, exclude=True) - - url: str = DEFAULT_OLLAMA_URL - - @classmethod - def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]: - return { - "url": url, - } diff --git a/llama_stack/providers/remote/inference/openai/config.py b/llama_stack/providers/remote/inference/openai/config.py deleted file mode 100644 index 36c66bd28..000000000 --- a/llama_stack/providers/remote/inference/openai/config.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class OpenAIProviderDataValidator(BaseModel): - openai_api_key: str | None = Field( - default=None, - description="API key for OpenAI models", - ) - - -@json_schema_type -class OpenAIConfig(RemoteInferenceProviderConfig): - base_url: str = Field( - default="https://api.openai.com/v1", - description="Base URL for OpenAI API", - ) - - @classmethod - def sample_run_config( - cls, - api_key: str = "${env.OPENAI_API_KEY:=}", - base_url: str = "${env.OPENAI_BASE_URL:=https://api.openai.com/v1}", - **kwargs, - ) -> dict[str, Any]: - return { - "api_key": api_key, - "base_url": base_url, - } diff --git a/llama_stack/providers/remote/inference/passthrough/__init__.py b/llama_stack/providers/remote/inference/passthrough/__init__.py deleted file mode 100644 index 69dd4c461..000000000 --- a/llama_stack/providers/remote/inference/passthrough/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from pydantic import BaseModel - -from .config import PassthroughImplConfig - - -class PassthroughProviderDataValidator(BaseModel): - url: str - api_key: str - - -async def get_adapter_impl(config: PassthroughImplConfig, _deps): - from .passthrough import PassthroughInferenceAdapter - - assert isinstance(config, PassthroughImplConfig), f"Unexpected config type: {type(config)}" - impl = PassthroughInferenceAdapter(config) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/llama_stack/providers/remote/inference/passthrough/config.py deleted file mode 100644 index f8e8b8ce5..000000000 --- a/llama_stack/providers/remote/inference/passthrough/config.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class PassthroughImplConfig(RemoteInferenceProviderConfig): - url: str = Field( - default=None, - description="The URL for the passthrough endpoint", - ) - - api_key: SecretStr | None = Field( - default=None, - description="API Key for the passthrouth endpoint", - ) - - @classmethod - def sample_run_config( - cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs - ) -> dict[str, Any]: - return { - "url": url, - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py deleted file mode 100644 index 4d4d4f41d..000000000 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import AsyncIterator -from typing import Any - -from llama_stack_client import AsyncLlamaStackClient - -from llama_stack.apis.inference import ( - Inference, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletion, - OpenAICompletionRequestWithExtraBody, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) -from llama_stack.apis.models import Model -from llama_stack.core.library_client import convert_pydantic_to_json_value -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - -from .config import PassthroughImplConfig - - -class PassthroughInferenceAdapter(Inference): - def __init__(self, config: PassthroughImplConfig) -> None: - ModelRegistryHelper.__init__(self) - self.config = config - - async def unregister_model(self, model_id: str) -> None: - pass - - async def register_model(self, model: Model) -> Model: - return model - - def _get_client(self) -> AsyncLlamaStackClient: - passthrough_url = None - passthrough_api_key = None - provider_data = None - - if self.config.url is not None: - passthrough_url = self.config.url - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_url: - raise ValueError( - 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' - ) - passthrough_url = provider_data.passthrough_url - - if self.config.api_key is not None: - passthrough_api_key = self.config.api_key.get_secret_value() - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.passthrough_api_key: - raise ValueError( - 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' - ) - passthrough_api_key = provider_data.passthrough_api_key - - return AsyncLlamaStackClient( - base_url=passthrough_url, - api_key=passthrough_api_key, - provider_data=provider_data, - ) - - async def openai_embeddings( - self, - params: OpenAIEmbeddingsRequestWithExtraBody, - ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() - - async def openai_completion( - self, - params: OpenAICompletionRequestWithExtraBody, - ) -> OpenAICompletion: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - - request_params = params.model_dump(exclude_none=True) - - return await client.inference.openai_completion(**request_params) - - async def openai_chat_completion( - self, - params: OpenAIChatCompletionRequestWithExtraBody, - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - client = self._get_client() - model_obj = await self.model_store.get_model(params.model) - - params = params.model_copy() - params.model = model_obj.provider_resource_id - - request_params = params.model_dump(exclude_none=True) - - return await client.inference.openai_chat_completion(**request_params) - - def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]: - json_params = {} - for key, value in request_params.items(): - json_input = convert_pydantic_to_json_value(value) - if isinstance(json_input, dict): - json_input = {k: v for k, v in json_input.items() if v is not None} - elif isinstance(json_input, list): - json_input = [x for x in json_input if x is not None] - new_input = [] - for x in json_input: - if isinstance(x, dict): - x = {k: v for k, v in x.items() if v is not None} - new_input.append(x) - json_input = new_input - - json_params[key] = json_input - - return json_params diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py deleted file mode 100644 index 3d16d20fd..000000000 --- a/llama_stack/providers/remote/inference/runpod/config.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class RunpodImplConfig(RemoteInferenceProviderConfig): - url: str | None = Field( - default=None, - description="The URL for the Runpod model serving endpoint", - ) - auth_credential: SecretStr | None = Field( - default=None, - alias="api_token", - description="The API token", - ) - - @classmethod - def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: - return { - "url": "${env.RUNPOD_URL:=}", - "api_token": "${env.RUNPOD_API_TOKEN}", - } diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/llama_stack/providers/remote/inference/sambanova/config.py deleted file mode 100644 index f63210434..000000000 --- a/llama_stack/providers/remote/inference/sambanova/config.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class SambaNovaProviderDataValidator(BaseModel): - sambanova_api_key: str | None = Field( - default=None, - description="Sambanova Cloud API key", - ) - - -@json_schema_type -class SambaNovaImplConfig(RemoteInferenceProviderConfig): - url: str = Field( - default="https://api.sambanova.ai/v1", - description="The URL for the SambaNova AI server", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "url": "https://api.sambanova.ai/v1", - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py deleted file mode 100644 index daa4b1670..000000000 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - -from .config import SambaNovaImplConfig - - -class SambaNovaInferenceAdapter(OpenAIMixin): - config: SambaNovaImplConfig - - provider_data_api_key_field: str = "sambanova_api_key" - download_images: bool = True # SambaNova does not support image downloads server-size, perform them on the client - """ - SambaNova Inference Adapter for Llama Stack. - """ - - def get_base_url(self) -> str: - """ - Get the base URL for OpenAI mixin. - - :return: The SambaNova base URL - """ - return self.config.url diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py deleted file mode 100644 index 47952abba..000000000 --- a/llama_stack/providers/remote/inference/tgi/config.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from pydantic import BaseModel, Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class TGIImplConfig(RemoteInferenceProviderConfig): - auth_credential: SecretStr | None = Field(default=None, exclude=True) - - url: str = Field( - description="The URL for the TGI serving endpoint", - ) - - @classmethod - def sample_run_config( - cls, - url: str = "${env.TGI_URL:=}", - **kwargs, - ): - return { - "url": url, - } - - -@json_schema_type -class InferenceEndpointImplConfig(BaseModel): - endpoint_name: str = Field( - description="The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided.", - ) - api_token: SecretStr | None = Field( - default=None, - description="Your Hugging Face user access token (will default to locally saved token if not provided)", - ) - - @classmethod - def sample_run_config( - cls, - endpoint_name: str = "${env.INFERENCE_ENDPOINT_NAME}", - api_token: str = "${env.HF_API_TOKEN}", - **kwargs, - ): - return { - "endpoint_name": endpoint_name, - "api_token": api_token, - } - - -@json_schema_type -class InferenceAPIImplConfig(BaseModel): - huggingface_repo: str = Field( - description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')", - ) - api_token: SecretStr | None = Field( - default=None, - description="Your Hugging Face user access token (will default to locally saved token if not provided)", - ) - - @classmethod - def sample_run_config( - cls, - repo: str = "${env.INFERENCE_MODEL}", - api_token: str = "${env.HF_API_TOKEN}", - **kwargs, - ): - return { - "huggingface_repo": repo, - "api_token": api_token, - } diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py deleted file mode 100644 index 47392c8e7..000000000 --- a/llama_stack/providers/remote/inference/together/config.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class TogetherImplConfig(RemoteInferenceProviderConfig): - url: str = Field( - default="https://api.together.xyz/v1", - description="The URL for the Together AI server", - ) - - @classmethod - def sample_run_config(cls, **kwargs) -> dict[str, Any]: - return { - "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY:=}", - } diff --git a/llama_stack/providers/remote/inference/vertexai/config.py b/llama_stack/providers/remote/inference/vertexai/config.py deleted file mode 100644 index 5f2efa894..000000000 --- a/llama_stack/providers/remote/inference/vertexai/config.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field, SecretStr - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class VertexAIProviderDataValidator(BaseModel): - vertex_project: str | None = Field( - default=None, - description="Google Cloud project ID for Vertex AI", - ) - vertex_location: str | None = Field( - default=None, - description="Google Cloud location for Vertex AI (e.g., us-central1)", - ) - - -@json_schema_type -class VertexAIConfig(RemoteInferenceProviderConfig): - auth_credential: SecretStr | None = Field(default=None, exclude=True) - - project: str = Field( - description="Google Cloud project ID for Vertex AI", - ) - location: str = Field( - default="us-central1", - description="Google Cloud location for Vertex AI", - ) - - @classmethod - def sample_run_config( - cls, - project: str = "${env.VERTEX_AI_PROJECT:=}", - location: str = "${env.VERTEX_AI_LOCATION:=us-central1}", - **kwargs, - ) -> dict[str, Any]: - return { - "project": project, - "location": location, - } diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py deleted file mode 100644 index e362aece6..000000000 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from pathlib import Path - -from pydantic import Field, SecretStr, field_validator - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig): - url: str | None = Field( - default=None, - description="The URL for the vLLM model serving endpoint", - ) - max_tokens: int = Field( - default=4096, - description="Maximum number of tokens to generate.", - ) - auth_credential: SecretStr | None = Field( - default=None, - alias="api_token", - description="The API token", - ) - tls_verify: bool | str = Field( - default=True, - description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.", - ) - - @field_validator("tls_verify") - @classmethod - def validate_tls_verify(cls, v): - if isinstance(v, str): - # Otherwise, treat it as a cert path - cert_path = Path(v).expanduser().resolve() - if not cert_path.exists(): - raise ValueError(f"TLS certificate file does not exist: {v}") - if not cert_path.is_file(): - raise ValueError(f"TLS certificate path is not a file: {v}") - return v - return v - - @classmethod - def sample_run_config( - cls, - url: str = "${env.VLLM_URL:=}", - **kwargs, - ): - return { - "url": url, - "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", - "api_token": "${env.VLLM_API_TOKEN:=fake}", - "tls_verify": "${env.VLLM_TLS_VERIFY:=true}", - } diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py deleted file mode 100644 index 8d8df13b4..000000000 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type - - -class WatsonXProviderDataValidator(BaseModel): - watsonx_project_id: str | None = Field( - default=None, - description="IBM WatsonX project ID", - ) - watsonx_api_key: str | None = None - - -@json_schema_type -class WatsonXConfig(RemoteInferenceProviderConfig): - url: str = Field( - default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), - description="A base url for accessing the watsonx.ai", - ) - project_id: str | None = Field( - default=None, - description="The watsonx.ai project ID", - ) - timeout: int = Field( - default=60, - description="Timeout for the HTTP requests", - ) - - @classmethod - def sample_run_config(cls, **kwargs) -> dict[str, Any]: - return { - "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", - "api_key": "${env.WATSONX_API_KEY:=}", - "project_id": "${env.WATSONX_PROJECT_ID:=}", - } diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md deleted file mode 100644 index 789514b1e..000000000 --- a/llama_stack/providers/remote/post_training/nvidia/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# NVIDIA Post-Training Provider for LlamaStack - -This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service. - -## Features - -- Supervised fine-tuning of Llama models -- LoRA fine-tuning support -- Job management and status tracking - -## Getting Started - -### Prerequisites - -- LlamaStack with NVIDIA configuration -- Access to Hosted NVIDIA NeMo Customizer service -- Dataset registered in the Hosted NVIDIA NeMo Customizer service -- Base model downloaded and available in the Hosted NVIDIA NeMo Customizer service - -### Setup - -Build the NVIDIA environment: - -```bash -uv run llama stack list-deps nvidia | xargs -L1 uv pip install -``` - -### Basic Usage using the LlamaStack Python Client - -### Create Customization Job - -#### Initialize the client - -```python -import os - -os.environ["NVIDIA_API_KEY"] = "your-api-key" -os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" -os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" -os.environ["NVIDIA_PROJECT_ID"] = "test-project" -os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1" - -from llama_stack.core.library_client import LlamaStackAsLibraryClient - -client = LlamaStackAsLibraryClient("nvidia") -client.initialize() -``` - -#### Configure fine-tuning parameters - -```python -from llama_stack_client.types.post_training_supervised_fine_tune_params import ( - TrainingConfig, - TrainingConfigDataConfig, - TrainingConfigOptimizerConfig, -) -from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig -``` - -#### Set up LoRA configuration - -```python -algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16) -``` - -#### Configure training data - -```python -data_config = TrainingConfigDataConfig( - dataset_id="your-dataset-id", # Use client.datasets.list() to see available datasets - batch_size=16, -) -``` - -#### Configure optimizer - -```python -optimizer_config = TrainingConfigOptimizerConfig( - lr=0.0001, -) -``` - -#### Set up training configuration - -```python -training_config = TrainingConfig( - n_epochs=2, - data_config=data_config, - optimizer_config=optimizer_config, -) -``` - -#### Start fine-tuning job - -```python -training_job = client.post_training.supervised_fine_tune( - job_uuid="unique-job-id", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, -) -``` - -### List all jobs - -```python -jobs = client.post_training.job.list() -``` - -### Check job status - -```python -job_status = client.post_training.job.status(job_uuid="your-job-id") -``` - -### Cancel a job - -```python -client.post_training.job.cancel(job_uuid="your-job-id") -``` - -### Inference with the fine-tuned model - -#### 1. Register the model - -```python -from llama_stack.apis.models import Model, ModelType - -client.models.register( - model_id="test-example-model@v1", - provider_id="nvidia", - provider_model_id="test-example-model@v1", - model_type=ModelType.llm, -) -``` - -#### 2. Inference with the fine-tuned model - -```python -response = client.completions.create( - prompt="Complete the sentence using one word: Roses are red, violets are ", - stream=False, - model="test-example-model@v1", - max_tokens=50, -) -print(response.choices[0].text) -``` diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/llama_stack/providers/remote/post_training/nvidia/post_training.py deleted file mode 100644 index d839ffd6f..000000000 --- a/llama_stack/providers/remote/post_training/nvidia/post_training.py +++ /dev/null @@ -1,430 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import warnings -from datetime import datetime -from typing import Any, Literal - -import aiohttp -from pydantic import BaseModel, ConfigDict - -from llama_stack.apis.post_training import ( - AlgorithmConfig, - DPOAlignmentConfig, - JobStatus, - PostTrainingJob, - PostTrainingJobArtifactsResponse, - PostTrainingJobStatusResponse, - TrainingConfig, -) -from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig -from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - -from .models import _MODEL_ENTRIES - -# Map API status to JobStatus enum -STATUS_MAPPING = { - "running": JobStatus.in_progress.value, - "completed": JobStatus.completed.value, - "failed": JobStatus.failed.value, - "cancelled": JobStatus.cancelled.value, - "pending": JobStatus.scheduled.value, - "unknown": JobStatus.scheduled.value, -} - - -class NvidiaPostTrainingJob(PostTrainingJob): - """Parse the response from the Customizer API. - Inherits job_uuid from PostTrainingJob. - Adds status, created_at, updated_at parameters. - Passes through all other parameters from data field in the response. - """ - - model_config = ConfigDict(extra="allow") - status: JobStatus - created_at: datetime - updated_at: datetime - - -class ListNvidiaPostTrainingJobs(BaseModel): - data: list[NvidiaPostTrainingJob] - - -class NvidiaPostTrainingJobStatusResponse(PostTrainingJobStatusResponse): - model_config = ConfigDict(extra="allow") - - -class NvidiaPostTrainingAdapter(ModelRegistryHelper): - def __init__(self, config: NvidiaPostTrainingConfig): - self.config = config - self.headers = {} - if config.api_key: - self.headers["Authorization"] = f"Bearer {config.api_key}" - - self.timeout = aiohttp.ClientTimeout(total=config.timeout) - # TODO: filter by available models based on /config endpoint - ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) - self.session = None - - self.customizer_url = config.customizer_url - if not self.customizer_url: - warnings.warn("Customizer URL is not set, using default value: http://nemo.test", stacklevel=2) - self.customizer_url = "http://nemo.test" - - async def _get_session(self) -> aiohttp.ClientSession: - if self.session is None or self.session.closed: - self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout) - return self.session - - async def _make_request( - self, - method: str, - path: str, - headers: dict[str, Any] | None = None, - params: dict[str, Any] | None = None, - json: dict[str, Any] | None = None, - **kwargs, - ) -> dict[str, Any]: - """Helper method to make HTTP requests to the Customizer API.""" - url = f"{self.customizer_url}{path}" - request_headers = self.headers.copy() - - if headers: - request_headers.update(headers) - - # Add content-type header for JSON requests - if json and "Content-Type" not in request_headers: - request_headers["Content-Type"] = "application/json" - - session = await self._get_session() - for _ in range(self.config.max_retries): - async with session.request(method, url, params=params, json=json, **kwargs) as response: - if response.status >= 400: - error_data = await response.json() - raise Exception(f"API request failed: {error_data}") - return await response.json() - - async def get_training_jobs( - self, - page: int | None = 1, - page_size: int | None = 10, - sort: Literal["created_at", "-created_at"] | None = "created_at", - ) -> ListNvidiaPostTrainingJobs: - """Get all customization jobs. - Updated the base class return type from ListPostTrainingJobsResponse to ListNvidiaPostTrainingJobs. - - Returns a ListNvidiaPostTrainingJobs object with the following fields: - - data: List[NvidiaPostTrainingJob] - List of NvidiaPostTrainingJob objects - - ToDo: Support for schema input for filtering. - """ - params = {"page": page, "page_size": page_size, "sort": sort} - - response = await self._make_request("GET", "/v1/customization/jobs", params=params) - - jobs = [] - for job in response.get("data", []): - job_id = job.pop("id") - job_status = job.pop("status", "scheduled").lower() - mapped_status = STATUS_MAPPING.get(job_status, "scheduled") - - # Convert string timestamps to datetime objects - created_at = ( - datetime.fromisoformat(job.pop("created_at")) - if "created_at" in job - else datetime.now(tz=datetime.timezone.utc) - ) - updated_at = ( - datetime.fromisoformat(job.pop("updated_at")) - if "updated_at" in job - else datetime.now(tz=datetime.timezone.utc) - ) - - # Create NvidiaPostTrainingJob instance - jobs.append( - NvidiaPostTrainingJob( - job_uuid=job_id, - status=JobStatus(mapped_status), - created_at=created_at, - updated_at=updated_at, - **job, - ) - ) - - return ListNvidiaPostTrainingJobs(data=jobs) - - async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse: - """Get the status of a customization job. - Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob. - - Returns a NvidiaPostTrainingJob object with the following fields: - - job_uuid: str - Unique identifier for the job - - status: JobStatus - Current status of the job (in_progress, completed, failed, cancelled, scheduled) - - created_at: datetime - The time when the job was created - - updated_at: datetime - The last time the job status was updated - - Additional fields that may be included: - - steps_completed: Optional[int] - Number of training steps completed - - epochs_completed: Optional[int] - Number of epochs completed - - percentage_done: Optional[float] - Percentage of training completed (0-100) - - best_epoch: Optional[int] - The epoch with the best performance - - train_loss: Optional[float] - Training loss of the best checkpoint - - val_loss: Optional[float] - Validation loss of the best checkpoint - - metrics: Optional[Dict] - Additional training metrics - - status_logs: Optional[List] - Detailed logs of status changes - """ - response = await self._make_request( - "GET", - f"/v1/customization/jobs/{job_uuid}/status", - params={"job_id": job_uuid}, - ) - - api_status = response.pop("status").lower() - mapped_status = STATUS_MAPPING.get(api_status, "scheduled") - - return NvidiaPostTrainingJobStatusResponse( - status=JobStatus(mapped_status), - job_uuid=job_uuid, - started_at=datetime.fromisoformat(response.pop("created_at")), - updated_at=datetime.fromisoformat(response.pop("updated_at")), - **response, - ) - - async def cancel_training_job(self, job_uuid: str) -> None: - await self._make_request( - method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid} - ) - - async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: - raise NotImplementedError("Job artifacts are not implemented yet") - - async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: - raise NotImplementedError("Job artifacts are not implemented yet") - - async def supervised_fine_tune( - self, - job_uuid: str, - training_config: dict[str, Any], - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - model: str, - checkpoint_dir: str | None, - algorithm_config: AlgorithmConfig | None = None, - ) -> NvidiaPostTrainingJob: - """ - Fine-tunes a model on a dataset. - Currently only supports Lora finetuning for standlone docker container. - Assumptions: - - nemo microservice is running and endpoint is set in config.customizer_url - - dataset is registered separately in nemo datastore - - model checkpoint is downloaded as per nemo customizer requirements - - Parameters: - training_config: TrainingConfig - Configuration for training - model: str - NeMo Customizer configuration name - algorithm_config: Optional[AlgorithmConfig] - Algorithm-specific configuration - checkpoint_dir: Optional[str] - Directory containing model checkpoints, ignored atm - job_uuid: str - Unique identifier for the job, ignored atm - hyperparam_search_config: Dict[str, Any] - Configuration for hyperparameter search, ignored atm - logger_config: Dict[str, Any] - Configuration for logging, ignored atm - - Environment Variables: - - NVIDIA_API_KEY: str - API key for the NVIDIA API - Default: None - - NVIDIA_DATASET_NAMESPACE: str - Namespace of the dataset - Default: "default" - - NVIDIA_CUSTOMIZER_URL: str - URL of the NeMo Customizer API - Default: "http://nemo.test" - - NVIDIA_PROJECT_ID: str - ID of the project - Default: "test-project" - - NVIDIA_OUTPUT_MODEL_DIR: str - Directory to save the output model - Default: "test-example-model@v1" - - Supported models: - - meta/llama-3.1-8b-instruct - - meta/llama-3.2-1b-instruct - - Supported algorithm configs: - - LoRA, SFT - - Supported Parameters: - - TrainingConfig: - - n_epochs: int - Number of epochs to train - Default: 50 - - data_config: DataConfig - Configuration for the dataset - - optimizer_config: OptimizerConfig - Configuration for the optimizer - - dtype: str - Data type for training - not supported (users are informed via warnings) - - efficiency_config: EfficiencyConfig - Configuration for efficiency - not supported - - max_steps_per_epoch: int - Maximum number of steps per epoch - Default: 1000 - ## NeMo customizer specific parameters - - log_every_n_steps: int - Log every n steps - Default: None - - val_check_interval: float - Validation check interval - Default: 0.25 - - sequence_packing_enabled: bool - Sequence packing enabled - Default: False - ## NeMo customizer specific SFT parameters - - hidden_dropout: float - Hidden dropout - Default: None (0.0-1.0) - - attention_dropout: float - Attention dropout - Default: None (0.0-1.0) - - ffn_dropout: float - FFN dropout - Default: None (0.0-1.0) - - - DataConfig: - - dataset_id: str - Dataset ID - - batch_size: int - Batch size - Default: 8 - - - OptimizerConfig: - - lr: float - Learning rate - Default: 0.0001 - ## NeMo customizer specific parameter - - weight_decay: float - Weight decay - Default: 0.01 - - - LoRA config: - ## NeMo customizer specific LoRA parameters - - alpha: int - Scaling factor for the LoRA update - Default: 16 - Note: - - checkpoint_dir, hyperparam_search_config, logger_config are not supported (users are informed via warnings) - - Some parameters from TrainingConfig, DataConfig, OptimizerConfig are not supported (users are informed via warnings) - - User is informed about unsupported parameters via warnings. - """ - - # Check for unsupported method parameters - unsupported_method_params = [] - if checkpoint_dir: - unsupported_method_params.append(f"checkpoint_dir={checkpoint_dir}") - if hyperparam_search_config: - unsupported_method_params.append("hyperparam_search_config") - if logger_config: - unsupported_method_params.append("logger_config") - - if unsupported_method_params: - warnings.warn( - f"Parameters: {', '.join(unsupported_method_params)} are not supported and will be ignored", - stacklevel=2, - ) - - # Define all supported parameters - supported_params = { - "training_config": { - "n_epochs", - "data_config", - "optimizer_config", - "log_every_n_steps", - "val_check_interval", - "sequence_packing_enabled", - "hidden_dropout", - "attention_dropout", - "ffn_dropout", - }, - "data_config": {"dataset_id", "batch_size"}, - "optimizer_config": {"lr", "weight_decay"}, - "lora_config": {"type", "alpha"}, - } - - # Validate all parameters at once - warn_unsupported_params(training_config, supported_params["training_config"], "TrainingConfig") - warn_unsupported_params(training_config["data_config"], supported_params["data_config"], "DataConfig") - warn_unsupported_params( - training_config["optimizer_config"], supported_params["optimizer_config"], "OptimizerConfig" - ) - - output_model = self.config.output_model_dir - - # Prepare base job configuration - job_config = { - "config": model, - "dataset": { - "name": training_config["data_config"]["dataset_id"], - "namespace": self.config.dataset_namespace, - }, - "hyperparameters": { - "training_type": "sft", - "finetuning_type": "lora", - **{ - k: v - for k, v in { - "epochs": training_config.get("n_epochs"), - "batch_size": training_config["data_config"].get("batch_size"), - "learning_rate": training_config["optimizer_config"].get("lr"), - "weight_decay": training_config["optimizer_config"].get("weight_decay"), - "log_every_n_steps": training_config.get("log_every_n_steps"), - "val_check_interval": training_config.get("val_check_interval"), - "sequence_packing_enabled": training_config.get("sequence_packing_enabled"), - }.items() - if v is not None - }, - }, - "project": self.config.project_id, - # TODO: ignored ownership, add it later - # "ownership": {"created_by": self.config.user_id, "access_policies": self.config.access_policies}, - "output_model": output_model, - } - - # Handle SFT-specific optional parameters - job_config["hyperparameters"]["sft"] = { - k: v - for k, v in { - "ffn_dropout": training_config.get("ffn_dropout"), - "hidden_dropout": training_config.get("hidden_dropout"), - "attention_dropout": training_config.get("attention_dropout"), - }.items() - if v is not None - } - - # Remove the sft dictionary if it's empty - if not job_config["hyperparameters"]["sft"]: - job_config["hyperparameters"].pop("sft") - - # Handle LoRA-specific configuration - if algorithm_config: - if algorithm_config.type == "LoRA": - warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config") - job_config["hyperparameters"]["lora"] = { - k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None - } - else: - raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}") - - # Create the customization job - response = await self._make_request( - method="POST", - path="/v1/customization/jobs", - headers={"Accept": "application/json"}, - json=job_config, - ) - - job_uuid = response["id"] - response.pop("status") - created_at = datetime.fromisoformat(response.pop("created_at")) - updated_at = datetime.fromisoformat(response.pop("updated_at")) - - return NvidiaPostTrainingJob( - job_uuid=job_uuid, status=JobStatus.in_progress, created_at=created_at, updated_at=updated_at, **response - ) - - async def preference_optimize( - self, - job_uuid: str, - finetuned_model: str, - algorithm_config: DPOAlignmentConfig, - training_config: TrainingConfig, - hyperparam_search_config: dict[str, Any], - logger_config: dict[str, Any], - ) -> PostTrainingJob: - """Optimize a model based on preference data.""" - raise NotImplementedError("Preference optimization is not implemented yet") - - async def get_training_job_container_logs(self, job_uuid: str) -> PostTrainingJobStatusResponse: - raise NotImplementedError("Job logs are not implemented yet") diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/llama_stack/providers/remote/post_training/nvidia/utils.py deleted file mode 100644 index 162951ff3..000000000 --- a/llama_stack/providers/remote/post_training/nvidia/utils.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import warnings -from typing import Any - -from pydantic import BaseModel - -from llama_stack.apis.post_training import TrainingConfig -from llama_stack.log import get_logger -from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig - -from .config import NvidiaPostTrainingConfig - -logger = get_logger(name=__name__, category="post_training::nvidia") - - -def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None: - keys = set(config_dict.__annotations__.keys()) if isinstance(config_dict, BaseModel) else config_dict.keys() - unsupported_params = [k for k in keys if k not in supported_keys] - if unsupported_params: - warnings.warn( - f"Parameters: {unsupported_params} in `{config_name}` not supported and will be ignored.", stacklevel=2 - ) - - -def validate_training_params( - training_config: dict[str, Any], supported_keys: set[str], config_name: str = "TrainingConfig" -) -> None: - """ - Validates training parameters against supported keys. - - Args: - training_config: Dictionary containing training configuration parameters - supported_keys: Set of supported parameter keys - config_name: Name of the configuration for warning messages - """ - sft_lora_fields = set(SFTLoRADefaultConfig.__annotations__.keys()) - training_config_fields = set(TrainingConfig.__annotations__.keys()) - - # Check for not supported parameters: - # - not in either of configs - # - in TrainingConfig but not in SFTLoRADefaultConfig - unsupported_params = [] - for key in training_config: - if isinstance(key, str) and key not in (supported_keys.union(sft_lora_fields)): - if key in (not sft_lora_fields or training_config_fields): - unsupported_params.append(key) - - if unsupported_params: - warnings.warn( - f"Parameters: {unsupported_params} in `{config_name}` are not supported and will be ignored.", stacklevel=2 - ) - - -# ToDo: implement post health checks for customizer are enabled -async def _get_health(url: str) -> tuple[bool, bool]: ... - - -async def check_health(config: NvidiaPostTrainingConfig) -> None: ... diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/llama_stack/providers/remote/safety/bedrock/bedrock.py deleted file mode 100644 index 75f96816a..000000000 --- a/llama_stack/providers/remote/safety/bedrock/bedrock.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from typing import Any - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( - RunShieldResponse, - Safety, - SafetyViolation, - ViolationLevel, -) -from llama_stack.apis.shields import Shield -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate -from llama_stack.providers.utils.bedrock.client import create_bedrock_client - -from .config import BedrockSafetyConfig - -logger = get_logger(name=__name__, category="safety::bedrock") - - -class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate): - def __init__(self, config: BedrockSafetyConfig) -> None: - self.config = config - self.registered_shields = [] - - async def initialize(self) -> None: - try: - self.bedrock_runtime_client = create_bedrock_client(self.config) - self.bedrock_client = create_bedrock_client(self.config, "bedrock") - except Exception as e: - raise RuntimeError("Error initializing BedrockSafetyAdapter") from e - - async def shutdown(self) -> None: - pass - - async def register_shield(self, shield: Shield) -> None: - response = self.bedrock_client.list_guardrails( - guardrailIdentifier=shield.provider_resource_id, - ) - if ( - not response["guardrails"] - or len(response["guardrails"]) == 0 - or response["guardrails"][0]["version"] != shield.params["guardrailVersion"] - ): - raise ValueError( - f"Shield {shield.provider_resource_id} with version {shield.params['guardrailVersion']} not found in Bedrock" - ) - - async def unregister_shield(self, identifier: str) -> None: - pass - - async def run_shield( - self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] = None - ) -> RunShieldResponse: - shield = await self.shield_store.get_shield(shield_id) - if not shield: - raise ValueError(f"Shield {shield_id} not found") - - """ - This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format - ```content = [ - { - "text": { - "text": "Is the AB503 Product a better investment than the S&P 500?" - } - } - ]``` - Incoming messages contain content, role . For now we will extract the content and - default the "qualifiers": ["query"] - """ - - shield_params = shield.params - logger.debug(f"run_shield::{shield_params}::messages={messages}") - - # - convert the messages into format Bedrock expects - content_messages = [] - for message in messages: - content_messages.append({"text": {"text": message.content}}) - logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:") - - response = self.bedrock_runtime_client.apply_guardrail( - guardrailIdentifier=shield.provider_resource_id, - guardrailVersion=shield_params["guardrailVersion"], - source="OUTPUT", # or 'INPUT' depending on your use case - content=content_messages, - ) - if response["action"] == "GUARDRAIL_INTERVENED": - user_message = "" - metadata = {} - for output in response["outputs"]: - # guardrails returns a list - however for this implementation we will leverage the last values - user_message = output["text"] - for assessment in response["assessments"]: - # guardrails returns a list - however for this implementation we will leverage the last values - metadata = dict(assessment) - - return RunShieldResponse( - violation=SafetyViolation( - user_message=user_message, - violation_level=ViolationLevel.ERROR, - metadata=metadata, - ) - ) - - return RunShieldResponse() diff --git a/llama_stack/providers/remote/safety/bedrock/config.py b/llama_stack/providers/remote/safety/bedrock/config.py deleted file mode 100644 index 1ca8d95cb..000000000 --- a/llama_stack/providers/remote/safety/bedrock/config.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class BedrockSafetyConfig(BedrockBaseConfig): - pass diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md deleted file mode 100644 index e589afe84..000000000 --- a/llama_stack/providers/remote/safety/nvidia/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# NVIDIA Safety Provider for LlamaStack - -This provider enables safety checks and guardrails for LLM interactions using NVIDIA's NeMo Guardrails service. - -## Features - -- Run safety checks for messages - -## Getting Started - -### Prerequisites - -- LlamaStack with NVIDIA configuration -- Access to NVIDIA NeMo Guardrails service -- NIM for model to use for safety check is deployed - -### Setup - -Build the NVIDIA environment: - -```bash -uv run llama stack list-deps nvidia | xargs -L1 uv pip install -``` - -### Basic Usage using the LlamaStack Python Client - -#### Initialize the client - -```python -import os - -os.environ["NVIDIA_API_KEY"] = "your-api-key" -os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test" - -from llama_stack.core.library_client import LlamaStackAsLibraryClient - -client = LlamaStackAsLibraryClient("nvidia") -client.initialize() -``` - -#### Create a safety shield - -```python -from llama_stack.apis.safety import Shield -from llama_stack.apis.inference import Message - -# Create a safety shield -shield = Shield( - shield_id="your-shield-id", - provider_resource_id="safety-model-id", # The model to use for safety checks - description="Safety checks for content moderation", -) - -# Register the shield -await client.safety.register_shield(shield) -``` - -#### Run safety checks - -```python -# Messages to check -messages = [Message(role="user", content="Your message to check")] - -# Run safety check -response = await client.safety.run_shield( - shield_id="your-shield-id", - messages=messages, -) - -# Check for violations -if response.violation: - print(f"Safety violation detected: {response.violation.user_message}") - print(f"Violation level: {response.violation.violation_level}") - print(f"Metadata: {response.violation.metadata}") -else: - print("No safety violations detected") -``` diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py deleted file mode 100644 index 1c618f4f4..000000000 --- a/llama_stack/providers/remote/safety/nvidia/config.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import os -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class NVIDIASafetyConfig(BaseModel): - """ - Configuration for the NVIDIA Guardrail microservice endpoint. - - Attributes: - guardrails_service_url (str): A base url for accessing the NVIDIA guardrail endpoint, e.g. http://0.0.0.0:7331 - config_id (str): The ID of the guardrails configuration to use from the configuration store - (https://developer.nvidia.com/docs/nemo-microservices/guardrails/source/guides/configuration-store-guide.html) - - """ - - guardrails_service_url: str = Field( - default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"), - description="The url for accessing the Guardrails service", - ) - config_id: str | None = Field( - default_factory=lambda: os.getenv("NVIDIA_GUARDRAILS_CONFIG_ID", "self-check"), - description="Guardrails configuration ID to use from the Guardrails configuration store", - ) - - @classmethod - def sample_run_config(cls, **kwargs) -> dict[str, Any]: - return { - "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}", - "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}", - } diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py deleted file mode 100644 index c0df8f095..000000000 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -import requests - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel -from llama_stack.apis.shields import Shield -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate - -from .config import NVIDIASafetyConfig - -logger = get_logger(name=__name__, category="safety::nvidia") - - -class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): - def __init__(self, config: NVIDIASafetyConfig) -> None: - """ - Initialize the NVIDIASafetyAdapter with a given safety configuration. - - Args: - config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID. - """ - self.config = config - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass - - async def register_shield(self, shield: Shield) -> None: - if not shield.provider_resource_id: - raise ValueError("Shield model not provided.") - - async def unregister_shield(self, identifier: str) -> None: - pass - - async def run_shield( - self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None - ) -> RunShieldResponse: - """ - Run a safety shield check against the provided messages. - - Args: - shield_id (str): The unique identifier for the shield to be used. - messages (List[Message]): A list of Message objects representing the conversation history. - params (Optional[dict[str, Any]]): Additional parameters for the shield check. - - Returns: - RunShieldResponse: The response containing safety violation details if any. - - Raises: - ValueError: If the shield with the provided shield_id is not found. - """ - shield = await self.shield_store.get_shield(shield_id) - if not shield: - raise ValueError(f"Shield {shield_id} not found") - - self.shield = NeMoGuardrails(self.config, shield.shield_id) - return await self.shield.run(messages) - - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: - raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation") - - -class NeMoGuardrails: - """ - A class that encapsulates NVIDIA's guardrails safety logic. - - Sends messages to the guardrails service and interprets the response to determine - if a safety violation has occurred. - """ - - def __init__( - self, - config: NVIDIASafetyConfig, - model: str, - threshold: float = 0.9, - temperature: float = 1.0, - ): - """ - Initialize a NeMoGuardrails instance with the provided parameters. - - Args: - config (NVIDIASafetyConfig): The safety configuration containing the config ID and guardrails URL. - model (str): The identifier or name of the model to be used for safety checks. - threshold (float, optional): The threshold for flagging violations. Defaults to 0.9. - temperature (float, optional): The temperature setting for the underlying model. Must be greater than 0. Defaults to 1.0. - - Raises: - ValueError: If temperature is less than or equal to 0. - AssertionError: If config_id is not provided in the configuration. - """ - self.config_id = config.config_id - self.model = model - assert self.config_id is not None, "Must provide config id" - if temperature <= 0: - raise ValueError("Temperature must be greater than 0") - - self.temperature = temperature - self.threshold = threshold - self.guardrails_service_url = config.guardrails_service_url - - async def _guardrails_post(self, path: str, data: Any | None): - """Helper for making POST requests to the guardrails service.""" - headers = { - "Accept": "application/json", - } - response = requests.post(url=f"{self.guardrails_service_url}{path}", headers=headers, json=data) - response.raise_for_status() - return response.json() - - async def run(self, messages: list[OpenAIMessageParam]) -> RunShieldResponse: - """ - Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API. - - Args: - messages (List[Message]): A list of Message objects to be checked for safety violations. - - Returns: - RunShieldResponse: If the response indicates a violation ("blocked" status), returns a - RunShieldResponse with a SafetyViolation; otherwise, returns a RunShieldResponse with violation set to None. - - Raises: - requests.HTTPError: If the POST request fails. - """ - request_data = { - "model": self.model, - "messages": [{"role": message.role, "content": message.content} for message in messages], - "temperature": self.temperature, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 160, - "stream": False, - "guardrails": { - "config_id": self.config_id, - }, - } - response = await self._guardrails_post(path="/v1/guardrail/checks", data=request_data) - - if response["status"] == "blocked": - user_message = "Sorry I cannot do this." - metadata = response["rails_status"] - - return RunShieldResponse( - violation=SafetyViolation( - user_message=user_message, - violation_level=ViolationLevel.ERROR, - metadata=metadata, - ) - ) - - return RunShieldResponse(violation=None) diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/llama_stack/providers/remote/safety/sambanova/config.py deleted file mode 100644 index 2cde97098..000000000 --- a/llama_stack/providers/remote/safety/sambanova/config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field, SecretStr - -from llama_stack.schema_utils import json_schema_type - - -class SambaNovaProviderDataValidator(BaseModel): - sambanova_api_key: str | None = Field( - default=None, - description="Sambanova Cloud API key", - ) - - -@json_schema_type -class SambaNovaSafetyConfig(BaseModel): - url: str = Field( - default="https://api.sambanova.ai/v1", - description="The URL for the SambaNova AI server", - ) - api_key: SecretStr | None = Field( - default=None, - description="The SambaNova cloud API Key", - ) - - @classmethod - def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: - return { - "url": "https://api.sambanova.ai/v1", - "api_key": api_key, - } diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py deleted file mode 100644 index 72359badd..000000000 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -import litellm -import requests - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( - RunShieldResponse, - Safety, - SafetyViolation, - ViolationLevel, -) -from llama_stack.apis.shields import Shield -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate - -from .config import SambaNovaSafetyConfig - -logger = get_logger(name=__name__, category="safety::sambanova") - -CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" - - -class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData): - def __init__(self, config: SambaNovaSafetyConfig) -> None: - self.config = config - self.environment_available_models = [] - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass - - def _get_api_key(self) -> str: - config_api_key = self.config.api_key if self.config.api_key else None - if config_api_key: - return config_api_key.get_secret_value() - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.sambanova_api_key: - raise ValueError( - 'Pass Sambanova API Key in the header X-LlamaStack-Provider-Data as { "sambanova_api_key": }' - ) - return provider_data.sambanova_api_key - - async def register_shield(self, shield: Shield) -> None: - list_models_url = self.config.url + "/models" - if len(self.environment_available_models) == 0: - try: - response = requests.get(list_models_url) - response.raise_for_status() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Request to {list_models_url} failed") from e - self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] - if ( - "guard" not in shield.provider_resource_id.lower() - or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models - ): - logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}") - - async def unregister_shield(self, identifier: str) -> None: - pass - - async def run_shield( - self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None - ) -> RunShieldResponse: - shield = await self.shield_store.get_shield(shield_id) - if not shield: - raise ValueError(f"Shield {shield_id} not found") - - shield_params = shield.params - logger.debug(f"run_shield::{shield_params}::messages={messages}") - - response = litellm.completion(model=shield.provider_resource_id, messages=messages, api_key=self._get_api_key()) - shield_message = response.choices[0].message.content - - if "unsafe" in shield_message.lower(): - user_message = CANNED_RESPONSE_TEXT - violation_type = shield_message.split("\n")[-1] - metadata = {"violation_type": violation_type} - - return RunShieldResponse( - violation=SafetyViolation( - user_message=user_message, - violation_level=ViolationLevel.ERROR, - metadata=metadata, - ) - ) - - return RunShieldResponse() diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py deleted file mode 100644 index b8c5e77fd..000000000 --- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel - - -class MCPProviderDataValidator(BaseModel): - # mcp_endpoint => dict of headers to send - mcp_headers: dict[str, dict[str, str]] | None = None - - -class MCPProviderConfig(BaseModel): - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return {} diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py deleted file mode 100644 index 578bb6d34..000000000 --- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any -from urllib.parse import urlparse - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.datatypes import Api -from llama_stack.apis.tools import ( - ListToolDefsResponse, - ToolGroup, - ToolInvocationResult, - ToolRuntime, -) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate -from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools - -from .config import MCPProviderConfig - -logger = get_logger(__name__, category="tools") - - -class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData): - def __init__(self, config: MCPProviderConfig, _deps: dict[Api, Any]): - self.config = config - - async def initialize(self): - pass - - async def register_toolgroup(self, toolgroup: ToolGroup) -> None: - pass - - async def unregister_toolgroup(self, toolgroup_id: str) -> None: - return - - async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None - ) -> ListToolDefsResponse: - # this endpoint should be retrieved by getting the tool group right? - if mcp_endpoint is None: - raise ValueError("mcp_endpoint is required") - headers = await self.get_headers_from_request(mcp_endpoint.uri) - return await list_mcp_tools(mcp_endpoint.uri, headers) - - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: - tool = await self.tool_store.get_tool(tool_name) - if tool.metadata is None or tool.metadata.get("endpoint") is None: - raise ValueError(f"Tool {tool_name} does not have metadata") - endpoint = tool.metadata.get("endpoint") - if urlparse(endpoint).scheme not in ("http", "https"): - raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL") - - headers = await self.get_headers_from_request(endpoint) - return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs) - - async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]: - def canonicalize_uri(uri: str) -> str: - return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}" - - headers = {} - - provider_data = self.get_request_provider_data() - if provider_data and provider_data.mcp_headers: - for uri, values in provider_data.mcp_headers.items(): - if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri): - continue - headers.update(values) - return headers diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py deleted file mode 100644 index e4b77c68d..000000000 --- a/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, ProviderSpec - -from .config import ChromaVectorIOConfig - - -async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): - from .chroma import ChromaVectorIOAdapter - - impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py deleted file mode 100644 index 209ba90bb..000000000 --- a/llama_stack/providers/remote/vector_io/chroma/config.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class ChromaVectorIOConfig(BaseModel): - url: str | None - persistence: KVStoreReference = Field(description="Config for KV store backend") - - @classmethod - def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: - return { - "url": url, - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::chroma_remote", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py deleted file mode 100644 index 526075bb2..000000000 --- a/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, ProviderSpec - -from .config import MilvusVectorIOConfig - - -async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): - from .milvus import MilvusVectorIOAdapter - - assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py deleted file mode 100644 index 8ff9e1328..000000000 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, ConfigDict, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class MilvusVectorIOConfig(BaseModel): - uri: str = Field(description="The URI of the Milvus server") - token: str | None = Field(description="The token of the Milvus server") - consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") - persistence: KVStoreReference = Field(description="Config for KV store backend") - - # This configuration allows additional fields to be passed through to the underlying Milvus client. - # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. - model_config = ConfigDict(extra="allow") - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return { - "uri": "${env.MILVUS_ENDPOINT}", - "token": "${env.MILVUS_TOKEN}", - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::milvus_remote", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py deleted file mode 100644 index 8086b7650..000000000 --- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, ProviderSpec - -from .config import PGVectorVectorIOConfig - - -async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): - from .pgvector import PGVectorVectorIOAdapter - - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py deleted file mode 100644 index d81e524e4..000000000 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class PGVectorVectorIOConfig(BaseModel): - host: str | None = Field(default="localhost") - port: int | None = Field(default=5432) - db: str | None = Field(default="postgres") - user: str | None = Field(default="postgres") - password: str | None = Field(default="mysecretpassword") - persistence: KVStoreReference | None = Field( - description="Config for KV store backend (SQLite only for now)", default=None - ) - - @classmethod - def sample_run_config( - cls, - __distro_dir__: str, - host: str = "${env.PGVECTOR_HOST:=localhost}", - port: int = "${env.PGVECTOR_PORT:=5432}", - db: str = "${env.PGVECTOR_DB}", - user: str = "${env.PGVECTOR_USER}", - password: str = "${env.PGVECTOR_PASSWORD}", - **kwargs: Any, - ) -> dict[str, Any]: - return { - "host": host, - "port": port, - "db": db, - "user": user, - "password": password, - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::pgvector", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py deleted file mode 100644 index e9527f101..000000000 --- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, ProviderSpec - -from .config import QdrantVectorIOConfig - - -async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): - from .qdrant import QdrantVectorIOAdapter - - impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py deleted file mode 100644 index 01fbcc5cb..000000000 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class QdrantVectorIOConfig(BaseModel): - location: str | None = None - url: str | None = None - port: int | None = 6333 - grpc_port: int = 6334 - prefer_grpc: bool = False - https: bool | None = None - api_key: str | None = None - prefix: str | None = None - timeout: int | None = None - host: str | None = None - persistence: KVStoreReference - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return { - "api_key": "${env.QDRANT_API_KEY:=}", - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::qdrant_remote", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py deleted file mode 100644 index 12e11d013..000000000 --- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.providers.datatypes import Api, ProviderSpec - -from .config import WeaviateVectorIOConfig - - -async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): - from .weaviate import WeaviateVectorIOAdapter - - impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) - await impl.initialize() - return impl diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py deleted file mode 100644 index 66dbf1fed..000000000 --- a/llama_stack/providers/remote/vector_io/weaviate/config.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, Field - -from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class WeaviateVectorIOConfig(BaseModel): - weaviate_api_key: str | None = Field(description="The API key for the Weaviate instance", default=None) - weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080") - persistence: KVStoreReference | None = Field( - description="Config for KV store backend (SQLite only for now)", default=None - ) - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: - return { - "weaviate_api_key": None, - "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}", - "persistence": KVStoreReference( - backend="kv_default", - namespace="vector_io::weaviate", - ).model_dump(exclude_none=True), - } diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py deleted file mode 100644 index 7e465a14c..000000000 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ /dev/null @@ -1,1401 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import json -import time -import uuid -import warnings -from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Iterable -from typing import ( - Any, -) - -from openai import AsyncStream -from openai.types.chat import ( - ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage, -) -from openai.types.chat import ( - ChatCompletionChunk as OpenAIChatCompletionChunk, -) -from openai.types.chat import ( - ChatCompletionContentPartImageParam as OpenAIChatCompletionContentPartImageParam, -) -from openai.types.chat import ( - ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam, -) -from openai.types.chat import ( - ChatCompletionContentPartTextParam as OpenAIChatCompletionContentPartTextParam, -) - -try: - from openai.types.chat import ( - ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall, - ) -except ImportError: - from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall, - ) -from openai.types.chat import ( - ChatCompletionMessageParam as OpenAIChatCompletionMessage, -) -from openai.types.chat import ( - ChatCompletionMessageToolCall, -) -from openai.types.chat import ( - ChatCompletionSystemMessageParam as OpenAIChatCompletionSystemMessage, -) -from openai.types.chat import ( - ChatCompletionToolMessageParam as OpenAIChatCompletionToolMessage, -) -from openai.types.chat import ( - ChatCompletionUserMessageParam as OpenAIChatCompletionUserMessage, -) -from openai.types.chat.chat_completion import ( - Choice as OpenAIChoice, -) -from openai.types.chat.chat_completion import ( - ChoiceLogprobs as OpenAIChoiceLogprobs, # same as chat_completion_chunk ChoiceLogprobs -) -from openai.types.chat.chat_completion_chunk import ( - Choice as OpenAIChatCompletionChunkChoice, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDelta as OpenAIChoiceDelta, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction, -) -from openai.types.chat.chat_completion_content_part_image_param import ( - ImageURL as OpenAIImageURL, -) -from openai.types.chat.chat_completion_message_tool_call import ( - Function as OpenAIFunction, -) -from pydantic import BaseModel - -from llama_stack.apis.common.content_types import ( - URL, - ImageContentItem, - InterleavedContent, - TextContentItem, - TextDelta, - ToolCallDelta, - ToolCallParseStatus, - _URLOrData, -) -from llama_stack.apis.inference import ( - ChatCompletionRequest, - ChatCompletionResponse, - ChatCompletionResponseEvent, - ChatCompletionResponseEventType, - ChatCompletionResponseStreamChunk, - CompletionMessage, - CompletionResponse, - CompletionResponseStreamChunk, - GreedySamplingStrategy, - JsonSchemaResponseFormat, - Message, - OpenAIChatCompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, - SamplingParams, - SystemMessage, - TokenLogProbs, - ToolChoice, - ToolConfig, - ToolResponseMessage, - TopKSamplingStrategy, - TopPSamplingStrategy, - UserMessage, -) -from llama_stack.apis.inference import ( - OpenAIChoice as OpenAIChatCompletionChoice, -) -from llama_stack.log import get_logger -from llama_stack.models.llama.datatypes import ( - BuiltinTool, - StopReason, - ToolCall, - ToolDefinition, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - convert_image_content_to_url, - decode_assistant_message, -) - -logger = get_logger(name=__name__, category="providers::utils") - - -class OpenAICompatCompletionChoiceDelta(BaseModel): - content: str - - -class OpenAICompatLogprobs(BaseModel): - text_offset: list[int] | None = None - - token_logprobs: list[float] | None = None - - tokens: list[str] | None = None - - top_logprobs: list[dict[str, float]] | None = None - - -class OpenAICompatCompletionChoice(BaseModel): - finish_reason: str | None = None - text: str | None = None - delta: OpenAICompatCompletionChoiceDelta | None = None - logprobs: OpenAICompatLogprobs | None = None - - -class OpenAICompatCompletionResponse(BaseModel): - choices: list[OpenAICompatCompletionChoice] - - -def get_sampling_strategy_options(params: SamplingParams) -> dict: - options = {} - if isinstance(params.strategy, GreedySamplingStrategy): - options["temperature"] = 0.0 - elif isinstance(params.strategy, TopPSamplingStrategy): - options["temperature"] = params.strategy.temperature - options["top_p"] = params.strategy.top_p - elif isinstance(params.strategy, TopKSamplingStrategy): - options["top_k"] = params.strategy.top_k - else: - raise ValueError(f"Unsupported sampling strategy: {params.strategy}") - - return options - - -def get_sampling_options(params: SamplingParams | None) -> dict: - if not params: - return {} - - options = {} - if params: - options.update(get_sampling_strategy_options(params)) - if params.max_tokens: - options["max_tokens"] = params.max_tokens - - if params.repetition_penalty is not None and params.repetition_penalty != 1.0: - options["repeat_penalty"] = params.repetition_penalty - - if params.stop is not None: - options["stop"] = params.stop - - return options - - -def text_from_choice(choice) -> str: - if hasattr(choice, "delta") and choice.delta: - return choice.delta.content - - if hasattr(choice, "message"): - return choice.message.content - - return choice.text - - -def get_stop_reason(finish_reason: str) -> StopReason: - if finish_reason in ["stop", "eos"]: - return StopReason.end_of_turn - elif finish_reason == "eom": - return StopReason.end_of_message - elif finish_reason == "length": - return StopReason.out_of_tokens - - return StopReason.out_of_tokens - - -def convert_openai_completion_logprobs( - logprobs: OpenAICompatLogprobs | None, -) -> list[TokenLogProbs] | None: - if not logprobs: - return None - if hasattr(logprobs, "top_logprobs"): - return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs] - - # Together supports logprobs with top_k=1 only. This means for each token position, - # they return only the logprobs for the selected token (vs. the top n most likely tokens). - # Here we construct the response by matching the selected token with the logprobs. - if logprobs.tokens and logprobs.token_logprobs: - return [ - TokenLogProbs(logprobs_by_token={token: token_lp}) - for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs, strict=False) - ] - return None - - -def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenAICompatLogprobs | None): - if logprobs is None: - return None - if isinstance(logprobs, float): - # Adapt response from Together CompletionChoicesChunk - return [TokenLogProbs(logprobs_by_token={text: logprobs})] - if hasattr(logprobs, "top_logprobs"): - return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs] - return None - - -def process_completion_response( - response: OpenAICompatCompletionResponse, -) -> CompletionResponse: - choice = response.choices[0] - # drop suffix if present and return stop reason as end of turn - if choice.text.endswith("<|eot_id|>"): - return CompletionResponse( - stop_reason=StopReason.end_of_turn, - content=choice.text[: -len("<|eot_id|>")], - logprobs=convert_openai_completion_logprobs(choice.logprobs), - ) - # drop suffix if present and return stop reason as end of message - if choice.text.endswith("<|eom_id|>"): - return CompletionResponse( - stop_reason=StopReason.end_of_message, - content=choice.text[: -len("<|eom_id|>")], - logprobs=convert_openai_completion_logprobs(choice.logprobs), - ) - return CompletionResponse( - stop_reason=get_stop_reason(choice.finish_reason), - content=choice.text, - logprobs=convert_openai_completion_logprobs(choice.logprobs), - ) - - -def process_chat_completion_response( - response: OpenAICompatCompletionResponse, - request: ChatCompletionRequest, -) -> ChatCompletionResponse: - choice = response.choices[0] - if choice.finish_reason == "tool_calls": - if not choice.message or not choice.message.tool_calls: - raise ValueError("Tool calls are not present in the response") - - tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls] - if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls): - # If we couldn't parse a tool call, jsonify the tool calls and return them - return ChatCompletionResponse( - completion_message=CompletionMessage( - stop_reason=StopReason.end_of_turn, - content=json.dumps(tool_calls, default=lambda x: x.model_dump()), - ), - logprobs=None, - ) - else: - # Otherwise, return tool calls as normal - return ChatCompletionResponse( - completion_message=CompletionMessage( - tool_calls=tool_calls, - stop_reason=StopReason.end_of_turn, - # Content is not optional - content="", - ), - logprobs=None, - ) - - # TODO: This does not work well with tool calls for vLLM remote provider - # Ref: https://github.com/meta-llama/llama-stack/issues/1058 - raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason)) - - # NOTE: If we do not set tools in chat-completion request, we should not - # expect the ToolCall in the response. Instead, we should return the raw - # response from the model. - if raw_message.tool_calls: - if not request.tools: - raw_message.tool_calls = [] - raw_message.content = text_from_choice(choice) - else: - # only return tool_calls if provided in the request - new_tool_calls = [] - request_tools = {t.tool_name: t for t in request.tools} - for t in raw_message.tool_calls: - if t.tool_name in request_tools: - new_tool_calls.append(t) - else: - logger.warning(f"Tool {t.tool_name} not found in request tools") - - if len(new_tool_calls) < len(raw_message.tool_calls): - raw_message.tool_calls = new_tool_calls - raw_message.content = text_from_choice(choice) - - return ChatCompletionResponse( - completion_message=CompletionMessage( - content=raw_message.content, - stop_reason=raw_message.stop_reason, - tool_calls=raw_message.tool_calls, - ), - logprobs=None, - ) - - -async def process_completion_stream_response( - stream: AsyncGenerator[OpenAICompatCompletionResponse, None], -) -> AsyncGenerator[CompletionResponseStreamChunk, None]: - stop_reason = None - - async for chunk in stream: - choice = chunk.choices[0] - finish_reason = choice.finish_reason - - text = text_from_choice(choice) - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - yield CompletionResponseStreamChunk( - delta=text, - stop_reason=stop_reason, - logprobs=convert_openai_completion_logprobs_stream(text, choice.logprobs), - ) - if finish_reason: - if finish_reason in ["stop", "eos", "eos_token"]: - stop_reason = StopReason.end_of_turn - elif finish_reason == "length": - stop_reason = StopReason.out_of_tokens - break - - yield CompletionResponseStreamChunk( - delta="", - stop_reason=stop_reason, - ) - - -async def process_chat_completion_stream_response( - stream: AsyncGenerator[OpenAICompatCompletionResponse, None], - request: ChatCompletionRequest, -) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None]: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta=TextDelta(text=""), - ) - ) - - buffer = "" - ipython = False - stop_reason = None - - async for chunk in stream: - choice = chunk.choices[0] - finish_reason = choice.finish_reason - - if finish_reason: - if stop_reason is None and finish_reason in ["stop", "eos", "eos_token"]: - stop_reason = StopReason.end_of_turn - elif stop_reason is None and finish_reason == "length": - stop_reason = StopReason.out_of_tokens - break - - text = text_from_choice(choice) - if not text: - # Sometimes you get empty chunks from providers - continue - - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue - - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - if ipython: - buffer += text - delta = ToolCallDelta( - tool_call=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=TextDelta(text=text), - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = decode_assistant_message(buffer, stop_reason) - - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call="", - parse_status=ToolCallParseStatus.failed, - ), - stop_reason=stop_reason, - ) - ) - - request_tools = {t.tool_name: t for t in request.tools} - for tool_call in message.tool_calls: - if tool_call.tool_name in request_tools: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call=tool_call, - parse_status=ToolCallParseStatus.succeeded, - ), - stop_reason=stop_reason, - ) - ) - else: - logger.warning(f"Tool {tool_call.tool_name} not found in request tools") - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - # Parsing tool call failed due to tool call not being found in request tools, - # We still add the raw message text inside tool_call for responding back to the user - tool_call=buffer, - parse_status=ToolCallParseStatus.failed, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta=TextDelta(text=""), - stop_reason=stop_reason, - ) - ) - - -async def convert_message_to_openai_dict(message: Message, download: bool = False) -> dict: - async def _convert_content(content) -> dict: - if isinstance(content, ImageContentItem): - return { - "type": "image_url", - "image_url": { - "url": await convert_image_content_to_url(content, download=download), - }, - } - else: - text = content.text if isinstance(content, TextContentItem) else content - assert isinstance(text, str) - return {"type": "text", "text": text} - - if isinstance(message.content, list): - content = [await _convert_content(c) for c in message.content] - else: - content = [await _convert_content(message.content)] - - result = { - "role": message.role, - "content": content, - } - - if hasattr(message, "tool_calls") and message.tool_calls: - result["tool_calls"] = [] - for tc in message.tool_calls: - # The tool.tool_name can be a str or a BuiltinTool enum. If - # it's the latter, convert to a string. - tool_name = tc.tool_name - if isinstance(tool_name, BuiltinTool): - tool_name = tool_name.value - - result["tool_calls"].append( - { - "id": tc.call_id, - "type": "function", - "function": { - "name": tool_name, - "arguments": tc.arguments, - }, - } - ) - return result - - -class UnparseableToolCall(BaseModel): - """ - A ToolCall with arguments that are not valid JSON. - Mirrors the ToolCall schema, but with arguments as a string. - """ - - call_id: str = "" - tool_name: str = "" - arguments: str = "" - - -async def convert_message_to_openai_dict_new( - message: Message | dict, - download_images: bool = False, -) -> OpenAIChatCompletionMessage: - """ - Convert a Message to an OpenAI API-compatible dictionary. - """ - # users can supply a dict instead of a Message object, we'll - # convert it to a Message object and proceed with some type safety. - if isinstance(message, dict): - if "role" not in message: - raise ValueError("role is required in message") - if message["role"] == "user": - message = UserMessage(**message) - elif message["role"] == "assistant": - message = CompletionMessage(**message) - elif message["role"] == "tool": - message = ToolResponseMessage(**message) - elif message["role"] == "system": - message = SystemMessage(**message) - else: - raise ValueError(f"Unsupported message role: {message['role']}") - - # Map Llama Stack spec to OpenAI spec - - # str -> str - # {"type": "text", "text": ...} -> {"type": "text", "text": ...} - # {"type": "image", "image": {"url": {"uri": ...}}} -> {"type": "image_url", "image_url": {"url": ...}} - # {"type": "image", "image": {"data": ...}} -> {"type": "image_url", "image_url": {"url": "data:image/?;base64,..."}} - # List[...] -> List[...] - async def _convert_message_content( - content: InterleavedContent, - ) -> str | Iterable[OpenAIChatCompletionContentPartParam]: - async def impl( - content_: InterleavedContent, - ) -> str | OpenAIChatCompletionContentPartParam | list[OpenAIChatCompletionContentPartParam]: - # Llama Stack and OpenAI spec match for str and text input - if isinstance(content_, str): - return content_ - elif isinstance(content_, TextContentItem): - return OpenAIChatCompletionContentPartTextParam( - type="text", - text=content_.text, - ) - elif isinstance(content_, ImageContentItem): - return OpenAIChatCompletionContentPartImageParam( - type="image_url", - image_url=OpenAIImageURL( - url=await convert_image_content_to_url(content_, download=download_images) - ), - ) - elif isinstance(content_, list): - return [await impl(item) for item in content_] - else: - raise ValueError(f"Unsupported content type: {type(content_)}") - - ret = await impl(content) - - # OpenAI*Message expects a str or list - if isinstance(ret, str) or isinstance(ret, list): - return ret - else: - return [ret] - - out: OpenAIChatCompletionMessage = None - if isinstance(message, UserMessage): - out = OpenAIChatCompletionUserMessage( - role="user", - content=await _convert_message_content(message.content), - ) - elif isinstance(message, CompletionMessage): - tool_calls = [ - OpenAIChatCompletionMessageFunctionToolCall( - id=tool.call_id, - function=OpenAIFunction( - name=(tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value), - arguments=tool.arguments, # Already a JSON string, don't double-encode - ), - type="function", - ) - for tool in message.tool_calls - ] - params = {} - if tool_calls: - params["tool_calls"] = tool_calls - out = OpenAIChatCompletionAssistantMessage( - role="assistant", - content=await _convert_message_content(message.content), - **params, - ) - elif isinstance(message, ToolResponseMessage): - out = OpenAIChatCompletionToolMessage( - role="tool", - tool_call_id=message.call_id, - content=await _convert_message_content(message.content), - ) - elif isinstance(message, SystemMessage): - out = OpenAIChatCompletionSystemMessage( - role="system", - content=await _convert_message_content(message.content), - ) - else: - raise ValueError(f"Unsupported message type: {type(message)}") - - return out - - -def convert_tool_call( - tool_call: ChatCompletionMessageToolCall, -) -> ToolCall | UnparseableToolCall: - """ - Convert a ChatCompletionMessageToolCall tool call to either a - ToolCall or UnparseableToolCall. Returns an UnparseableToolCall - if the tool call is not valid ToolCall. - """ - try: - valid_tool_call = ToolCall( - call_id=tool_call.id, - tool_name=tool_call.function.name, - arguments=tool_call.function.arguments, - ) - except Exception: - return UnparseableToolCall( - call_id=tool_call.id or "", - tool_name=tool_call.function.name or "", - arguments=tool_call.function.arguments or "", - ) - - return valid_tool_call - - -PYTHON_TYPE_TO_LITELLM_TYPE = { - "int": "integer", - "float": "number", - "bool": "boolean", - "str": "string", -} - - -def to_openai_param_type(param_type: str) -> dict: - """ - Convert Python type hints to OpenAI parameter type format. - - Examples: - 'str' -> {'type': 'string'} - 'int' -> {'type': 'integer'} - 'list[str]' -> {'type': 'array', 'items': {'type': 'string'}} - 'list[int]' -> {'type': 'array', 'items': {'type': 'integer'}} - """ - # Handle basic types first - basic_types = { - "str": "string", - "int": "integer", - "float": "number", - "bool": "boolean", - } - - if param_type in basic_types: - return {"type": basic_types[param_type]} - - # Handle list/array types - if param_type.startswith("list[") and param_type.endswith("]"): - inner_type = param_type[5:-1] - if inner_type in basic_types: - return { - "type": "array", - "items": {"type": basic_types.get(inner_type, inner_type)}, - } - - return {"type": param_type} - - -def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict: - """ - Convert a ToolDefinition to an OpenAI API-compatible dictionary. - - ToolDefinition: - tool_name: str | BuiltinTool - description: Optional[str] - input_schema: Optional[Dict[str, Any]] # JSON Schema - output_schema: Optional[Dict[str, Any]] # JSON Schema (not used by OpenAI) - - OpenAI spec - - - { - "type": "function", - "function": { - "name": tool_name, - "description": description, - "parameters": {}, - }, - } - - NOTE: OpenAI does not support output_schema, so it is dropped here. - """ - out = { - "type": "function", - "function": {}, - } - function = out["function"] - - if isinstance(tool.tool_name, BuiltinTool): - function["name"] = tool.tool_name.value - else: - function["name"] = tool.tool_name - - if tool.description: - function["description"] = tool.description - - if tool.input_schema: - # Pass through the entire JSON Schema as-is - function["parameters"] = tool.input_schema - - # NOTE: OpenAI does not support output_schema, so we drop it here - # It's stored in LlamaStack for validation and other provider usage - - return out - - -def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str: - """ - Convert a StopReason to an OpenAI chat completion finish_reason. - """ - return { - StopReason.end_of_turn: "stop", - StopReason.end_of_message: "tool_calls", - StopReason.out_of_tokens: "length", - }.get(stop_reason, "stop") - - -def _convert_openai_finish_reason(finish_reason: str) -> StopReason: - """ - Convert an OpenAI chat completion finish_reason to a StopReason. - - finish_reason: Literal["stop", "length", "tool_calls", ...] - - stop: model hit a natural stop point or a provided stop sequence - - length: maximum number of tokens specified in the request was reached - - tool_calls: model called a tool - - -> - - class StopReason(Enum): - end_of_turn = "end_of_turn" - end_of_message = "end_of_message" - out_of_tokens = "out_of_tokens" - """ - - # TODO(mf): are end_of_turn and end_of_message semantics correct? - return { - "stop": StopReason.end_of_turn, - "length": StopReason.out_of_tokens, - "tool_calls": StopReason.end_of_message, - }.get(finish_reason, StopReason.end_of_turn) - - -def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None = None) -> ToolConfig: - tool_config = ToolConfig() - if tool_choice: - try: - tool_choice = ToolChoice(tool_choice) - except ValueError: - pass - tool_config.tool_choice = tool_choice - return tool_config - - -def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]: - lls_tools = [] - if not tools: - return lls_tools - - for tool in tools: - tool_fn = tool.get("function", {}) - tool_name = tool_fn.get("name", None) - tool_desc = tool_fn.get("description", None) - tool_params = tool_fn.get("parameters", None) - - lls_tool = ToolDefinition( - tool_name=tool_name, - description=tool_desc, - input_schema=tool_params, # Pass through entire JSON Schema - ) - lls_tools.append(lls_tool) - return lls_tools - - -def _convert_openai_request_response_format( - response_format: OpenAIResponseFormatParam = None, -): - if not response_format: - return None - # response_format can be a dict or a pydantic model - response_format = dict(response_format) - if response_format.get("type", "") == "json_schema": - return JsonSchemaResponseFormat( - type="json_schema", - json_schema=response_format.get("json_schema", {}).get("schema", ""), - ) - return None - - -def _convert_openai_tool_calls( - tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall], -) -> list[ToolCall]: - """ - Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall. - - OpenAI ChatCompletionMessageToolCall: - id: str - function: Function - type: Literal["function"] - - OpenAI Function: - arguments: str - name: str - - -> - - ToolCall: - call_id: str - tool_name: str - arguments: Dict[str, ...] - """ - if not tool_calls: - return [] # CompletionMessage tool_calls is not optional - - return [ - ToolCall( - call_id=call.id, - tool_name=call.function.name, - arguments=call.function.arguments, - ) - for call in tool_calls - ] - - -def _convert_openai_logprobs( - logprobs: OpenAIChoiceLogprobs, -) -> list[TokenLogProbs] | None: - """ - Convert an OpenAI ChoiceLogprobs into a list of TokenLogProbs. - - OpenAI ChoiceLogprobs: - content: Optional[List[ChatCompletionTokenLogprob]] - - OpenAI ChatCompletionTokenLogprob: - token: str - logprob: float - top_logprobs: List[TopLogprob] - - OpenAI TopLogprob: - token: str - logprob: float - - -> - - TokenLogProbs: - logprobs_by_token: Dict[str, float] - - token, logprob - - """ - if not logprobs or not logprobs.content: - return None - - return [ - TokenLogProbs(logprobs_by_token={logprobs.token: logprobs.logprob for logprobs in content.top_logprobs}) - for content in logprobs.content - ] - - -def _convert_openai_sampling_params( - max_tokens: int | None = None, - temperature: float | None = None, - top_p: float | None = None, -) -> SamplingParams: - sampling_params = SamplingParams() - - if max_tokens: - sampling_params.max_tokens = max_tokens - - # Map an explicit temperature of 0 to greedy sampling - if temperature == 0: - strategy = GreedySamplingStrategy() - else: - # OpenAI defaults to 1.0 for temperature and top_p if unset - if temperature is None: - temperature = 1.0 - if top_p is None: - top_p = 1.0 - strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) - - sampling_params.strategy = strategy - return sampling_params - - -def openai_messages_to_messages( - messages: list[OpenAIMessageParam], -) -> list[Message]: - """ - Convert a list of OpenAIChatCompletionMessage into a list of Message. - """ - converted_messages = [] - for message in messages: - if message.role == "system": - converted_message = SystemMessage(content=openai_content_to_content(message.content)) - elif message.role == "user": - converted_message = UserMessage(content=openai_content_to_content(message.content)) - elif message.role == "assistant": - converted_message = CompletionMessage( - content=openai_content_to_content(message.content), - tool_calls=_convert_openai_tool_calls(message.tool_calls), - stop_reason=StopReason.end_of_turn, - ) - elif message.role == "tool": - converted_message = ToolResponseMessage( - role="tool", - call_id=message.tool_call_id, - content=openai_content_to_content(message.content), - ) - else: - raise ValueError(f"Unknown role {message.role}") - converted_messages.append(converted_message) - return converted_messages - - -def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None): - if content is None: - return "" - if isinstance(content, str): - return content - elif isinstance(content, list): - return [openai_content_to_content(c) for c in content] - elif hasattr(content, "type"): - if content.type == "text": - return TextContentItem(type="text", text=content.text) - elif content.type == "image_url": - return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) - else: - raise ValueError(f"Unknown content type: {content.type}") - else: - raise ValueError(f"Unknown content type: {content}") - - -def convert_openai_chat_completion_choice( - choice: OpenAIChoice, -) -> ChatCompletionResponse: - """ - Convert an OpenAI Choice into a ChatCompletionResponse. - - OpenAI Choice: - message: ChatCompletionMessage - finish_reason: str - logprobs: Optional[ChoiceLogprobs] - - OpenAI ChatCompletionMessage: - role: Literal["assistant"] - content: Optional[str] - tool_calls: Optional[List[ChatCompletionMessageToolCall]] - - -> - - ChatCompletionResponse: - completion_message: CompletionMessage - logprobs: Optional[List[TokenLogProbs]] - - CompletionMessage: - role: Literal["assistant"] - content: str | ImageMedia | List[str | ImageMedia] - stop_reason: StopReason - tool_calls: List[ToolCall] - - class StopReason(Enum): - end_of_turn = "end_of_turn" - end_of_message = "end_of_message" - out_of_tokens = "out_of_tokens" - """ - assert hasattr(choice, "message") and choice.message, "error in server response: message not found" - assert hasattr(choice, "finish_reason") and choice.finish_reason, ( - "error in server response: finish_reason not found" - ) - - return ChatCompletionResponse( - completion_message=CompletionMessage( - content=choice.message.content or "", # CompletionMessage content is not optional - stop_reason=_convert_openai_finish_reason(choice.finish_reason), - tool_calls=_convert_openai_tool_calls(choice.message.tool_calls), - ), - logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)), - ) - - -async def convert_openai_chat_completion_stream( - stream: AsyncStream[OpenAIChatCompletionChunk], - enable_incremental_tool_calls: bool, -) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None]: - """ - Convert a stream of OpenAI chat completion chunks into a stream - of ChatCompletionResponseStreamChunk. - """ - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta=TextDelta(text=""), - ) - ) - event_type = ChatCompletionResponseEventType.progress - - stop_reason = None - tool_call_idx_to_buffer = {} - - async for chunk in stream: - choice = chunk.choices[0] # assuming only one choice per chunk - - # we assume there's only one finish_reason in the stream - stop_reason = _convert_openai_finish_reason(choice.finish_reason) or stop_reason - logprobs = getattr(choice, "logprobs", None) - - # if there's a tool call, emit an event for each tool in the list - # if tool call and content, emit both separately - if choice.delta.tool_calls: - # the call may have content and a tool call. ChatCompletionResponseEvent - # does not support both, so we emit the content first - if choice.delta.content: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=TextDelta(text=choice.delta.content), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - - # it is possible to have parallel tool calls in stream, but - # ChatCompletionResponseEvent only supports one per stream - if len(choice.delta.tool_calls) > 1: - warnings.warn( - "multiple tool calls found in a single delta, using the first, ignoring the rest", - stacklevel=2, - ) - - if not enable_incremental_tool_calls: - for tool_call in choice.delta.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=ToolCallDelta( - tool_call=_convert_openai_tool_calls([tool_call])[0], - parse_status=ToolCallParseStatus.succeeded, - ), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - else: - for tool_call in choice.delta.tool_calls: - idx = tool_call.index if hasattr(tool_call, "index") else 0 - - if idx not in tool_call_idx_to_buffer: - tool_call_idx_to_buffer[idx] = { - "call_id": tool_call.id, - "name": None, - "arguments": "", - "content": "", - } - - buffer = tool_call_idx_to_buffer[idx] - - if tool_call.function: - if tool_call.function.name: - buffer["name"] = tool_call.function.name - delta = f"{buffer['name']}(" - buffer["content"] += delta - - if tool_call.function.arguments: - delta = tool_call.function.arguments - buffer["arguments"] += delta - buffer["content"] += delta - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=ToolCallDelta( - tool_call=delta, - parse_status=ToolCallParseStatus.in_progress, - ), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - elif choice.delta.content: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=TextDelta(text=choice.delta.content or ""), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - - for idx, buffer in tool_call_idx_to_buffer.items(): - logger.debug(f"toolcall_buffer[{idx}]: {buffer}") - if buffer["name"]: - delta = ")" - buffer["content"] += delta - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=ToolCallDelta( - tool_call=delta, - parse_status=ToolCallParseStatus.in_progress, - ), - logprobs=None, - ) - ) - - try: - tool_call = ToolCall( - call_id=buffer["call_id"], - tool_name=buffer["name"], - arguments=buffer["arguments"], - ) - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call=tool_call, - parse_status=ToolCallParseStatus.succeeded, - ), - stop_reason=stop_reason, - ) - ) - except json.JSONDecodeError as e: - print(f"Failed to parse arguments: {e}") - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call=buffer["content"], - parse_status=ToolCallParseStatus.failed, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta=TextDelta(text=""), - stop_reason=stop_reason, - ) - ) - - -async def prepare_openai_completion_params(**params): - async def _prepare_value(value: Any) -> Any: - new_value = value - if isinstance(value, list): - new_value = [await _prepare_value(v) for v in value] - elif isinstance(value, dict): - new_value = {k: await _prepare_value(v) for k, v in value.items()} - elif isinstance(value, BaseModel): - new_value = value.model_dump(exclude_none=True) - return new_value - - completion_params = {} - for k, v in params.items(): - if v is not None: - completion_params[k] = await _prepare_value(v) - return completion_params - - -class OpenAIChatCompletionToLlamaStackMixin: - async def openai_chat_completion( - self, - model: str, - messages: list[OpenAIMessageParam], - frequency_penalty: float | None = None, - function_call: str | dict[str, Any] | None = None, - functions: list[dict[str, Any]] | None = None, - logit_bias: dict[str, float] | None = None, - logprobs: bool | None = None, - max_completion_tokens: int | None = None, - max_tokens: int | None = None, - n: int | None = None, - parallel_tool_calls: bool | None = None, - presence_penalty: float | None = None, - response_format: OpenAIResponseFormatParam | None = None, - seed: int | None = None, - stop: str | list[str] | None = None, - stream: bool | None = None, - stream_options: dict[str, Any] | None = None, - temperature: float | None = None, - tool_choice: str | dict[str, Any] | None = None, - tools: list[dict[str, Any]] | None = None, - top_logprobs: int | None = None, - top_p: float | None = None, - user: str | None = None, - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - messages = openai_messages_to_messages(messages) - response_format = _convert_openai_request_response_format(response_format) - sampling_params = _convert_openai_sampling_params( - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - ) - tool_config = _convert_openai_request_tool_config(tool_choice) - - tools = _convert_openai_request_tools(tools) - if tool_config.tool_choice == ToolChoice.none: - tools = [] - - outstanding_responses = [] - # "n" is the number of completions to generate per prompt - n = n or 1 - for _i in range(0, n): - response = self.chat_completion( - model_id=model, - messages=messages, - sampling_params=sampling_params, - response_format=response_format, - stream=stream, - tool_config=tool_config, - tools=tools, - ) - outstanding_responses.append(response) - - if stream: - return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses) - - return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response( - self, model, outstanding_responses - ) - - async def _process_stream_response( - self, - model: str, - outstanding_responses: list[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]], - ): - id = f"chatcmpl-{uuid.uuid4()}" - for i, outstanding_response in enumerate(outstanding_responses): - response = await outstanding_response - async for chunk in response: - event = chunk.event - finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason) - - if isinstance(event.delta, TextDelta): - text_delta = event.delta.text - delta = OpenAIChoiceDelta(content=text_delta) - yield OpenAIChatCompletionChunk( - id=id, - choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)], - created=int(time.time()), - model=model, - object="chat.completion.chunk", - ) - elif isinstance(event.delta, ToolCallDelta): - if event.delta.parse_status == ToolCallParseStatus.succeeded: - tool_call = event.delta.tool_call - - # First chunk includes full structure - openai_tool_call = OpenAIChoiceDeltaToolCall( - index=0, - id=tool_call.call_id, - function=OpenAIChoiceDeltaToolCallFunction( - name=tool_call.tool_name, - arguments="", - ), - ) - delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call]) - yield OpenAIChatCompletionChunk( - id=id, - choices=[ - OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) - ], - created=int(time.time()), - model=model, - object="chat.completion.chunk", - ) - # arguments - openai_tool_call = OpenAIChoiceDeltaToolCall( - index=0, - function=OpenAIChoiceDeltaToolCallFunction( - arguments=tool_call.arguments, - ), - ) - delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call]) - yield OpenAIChatCompletionChunk( - id=id, - choices=[ - OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta) - ], - created=int(time.time()), - model=model, - object="chat.completion.chunk", - ) - - async def _process_non_stream_response( - self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]] - ) -> OpenAIChatCompletion: - choices = [] - for outstanding_response in outstanding_responses: - response = await outstanding_response - completion_message = response.completion_message - message = await convert_message_to_openai_dict_new(completion_message) - finish_reason = _convert_stop_reason_to_openai_finish_reason(completion_message.stop_reason) - - choice = OpenAIChatCompletionChoice( - index=len(choices), - message=message, - finish_reason=finish_reason, - ) - choices.append(choice) - - return OpenAIChatCompletion( - id=f"chatcmpl-{uuid.uuid4()}", - choices=choices, - created=int(time.time()), - model=model, - object="chat.completion", - ) - - -def prepare_openai_embeddings_params( - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, -): - if model is None: - raise ValueError("Model must be provided for embeddings") - - input_list = [input] if isinstance(input, str) else input - - params: dict[str, Any] = { - "model": model, - "input": input_list, - } - - if encoding_format is not None: - params["encoding_format"] = encoding_format - if dimensions is not None: - params["dimensions"] = dimensions - if user is not None: - params["user"] = user - - return params diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py deleted file mode 100644 index d06b7454d..000000000 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ /dev/null @@ -1,495 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import base64 -import io -import json -import re -from typing import Any - -import httpx -from PIL import Image as PIL_Image - -from llama_stack.apis.common.content_types import ( - ImageContentItem, - InterleavedContent, - InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.inference import ( - ChatCompletionRequest, - CompletionRequest, - Message, - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIFile, - ResponseFormat, - ResponseFormatType, - SystemMessage, - SystemMessageBehavior, - ToolChoice, - ToolDefinition, - UserMessage, -) -from llama_stack.log import get_logger -from llama_stack.models.llama.datatypes import ( - RawContent, - RawContentItem, - RawMediaItem, - RawMessage, - RawTextItem, - Role, - StopReason, - ToolPromptFormat, -) -from llama_stack.models.llama.llama3.chat_format import ChatFormat -from llama_stack.models.llama.llama3.prompt_templates import ( - BuiltinToolGenerator, - FunctionTagCustomToolGenerator, - JsonCustomToolGenerator, - PythonListCustomToolGenerator, - SystemDefaultGenerator, -) -from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.models.llama.llama4.prompt_templates.system_prompts import ( - PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4, -) -from llama_stack.models.llama.sku_list import resolve_model -from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal -from llama_stack.providers.utils.inference import supported_inference_models - -log = get_logger(name=__name__, category="providers::utils") - - -class ChatCompletionRequestWithRawContent(ChatCompletionRequest): - messages: list[RawMessage] - - -class CompletionRequestWithRawContent(CompletionRequest): - content: RawContent - - -def decode_assistant_message(content: str, stop_reason: StopReason) -> RawMessage: - formatter = ChatFormat(Tokenizer.get_instance()) - return formatter.decode_assistant_message_from_content(content, stop_reason) - - -def interleaved_content_as_str( - content: Any, - sep: str = " ", -) -> str: - if content is None: - return "" - - def _process(c) -> str: - if isinstance(c, str): - return c - elif isinstance(c, TextContentItem) or isinstance(c, OpenAIChatCompletionContentPartTextParam): - return c.text - elif isinstance(c, ImageContentItem) or isinstance(c, OpenAIChatCompletionContentPartImageParam): - return "" - elif isinstance(c, OpenAIFile): - return "" - else: - raise ValueError(f"Unsupported content type: {type(c)}") - - if isinstance(content, list): - return sep.join(_process(c) for c in content) - else: - return _process(content) - - -async def convert_request_to_raw( - request: ChatCompletionRequest | CompletionRequest, -) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent: - if isinstance(request, ChatCompletionRequest): - messages = [] - for m in request.messages: - content = await interleaved_content_convert_to_raw(m.content) - d = m.model_dump() - d["content"] = content - messages.append(RawMessage(**d)) - - d = request.model_dump() - d["messages"] = messages - request = ChatCompletionRequestWithRawContent(**d) - else: - d = request.model_dump() - d["content"] = await interleaved_content_convert_to_raw(request.content) - request = CompletionRequestWithRawContent(**d) - - return request - - -async def interleaved_content_convert_to_raw( - content: InterleavedContent, -) -> RawContent: - """Download content from URLs / files etc. so plain bytes can be sent to the model""" - - async def _localize_single(c: str | InterleavedContentItem) -> str | RawContentItem: - if isinstance(c, str): - return RawTextItem(text=c) - elif isinstance(c, TextContentItem): - return RawTextItem(text=c.text) - elif isinstance(c, ImageContentItem): - image = c.image - if image.url: - # Load image bytes from URL - if image.url.uri.startswith("data"): - match = re.match(r"data:image/(\w+);base64,(.+)", image.url.uri) - if not match: - raise ValueError(f"Invalid data URL format, {image.url.uri[:40]}...") - _, image_data = match.groups() - data = base64.b64decode(image_data) - elif image.url.uri.startswith("file://"): - path = image.url.uri[len("file://") :] - with open(path, "rb") as f: - data = f.read() # type: ignore - elif image.url.uri.startswith("http"): - async with httpx.AsyncClient() as client: - response = await client.get(image.url.uri) - data = response.content - else: - raise ValueError("Unsupported URL type") - elif image.data: - # data is a base64 encoded string, decode it to bytes for RawMediaItem - data = base64.b64decode(image.data) - else: - raise ValueError("No data or URL provided") - - return RawMediaItem(data=data) - else: - raise ValueError(f"Unsupported content type: {type(c)}") - - if isinstance(content, list): - return await asyncio.gather(*(_localize_single(c) for c in content)) - else: - return await _localize_single(content) - - -def content_has_media(content: InterleavedContent): - def _has_media_content(c): - return isinstance(c, ImageContentItem) - - if isinstance(content, list): - return any(_has_media_content(c) for c in content) - else: - return _has_media_content(content) - - -def messages_have_media(messages: list[Message]): - return any(content_has_media(m.content) for m in messages) - - -def request_has_media(request: ChatCompletionRequest | CompletionRequest): - if isinstance(request, ChatCompletionRequest): - return messages_have_media(request.messages) - else: - return content_has_media(request.content) - - -async def localize_image_content(uri: str) -> tuple[bytes, str] | None: - if uri.startswith("http"): - async with httpx.AsyncClient() as client: - r = await client.get(uri) - content = r.content - content_type = r.headers.get("content-type") - if content_type: - format = content_type.split("/")[-1] - else: - format = "png" - - return content, format - elif uri.startswith("data"): - # data:image/{format};base64,{data} - match = re.match(r"data:image/(\w+);base64,(.+)", uri) - if not match: - raise ValueError(f"Invalid data URL format, {uri[:40]}...") - fmt, image_data = match.groups() - content = base64.b64decode(image_data) - return content, fmt - else: - return None - - -async def convert_image_content_to_url( - media: ImageContentItem, download: bool = False, include_format: bool = True -) -> str: - image = media.image - if image.url and (not download or image.url.uri.startswith("data")): - return image.url.uri - - if image.data: - # data is a base64 encoded string, decode it to bytes first - # TODO(mf): do this more efficiently, decode less - content = base64.b64decode(image.data) - pil_image = PIL_Image.open(io.BytesIO(content)) - format = pil_image.format - else: - localize_result = await localize_image_content(image.url.uri) - if localize_result is None: - raise ValueError(f"Failed to localize image content from {image.url.uri}") - content, format = localize_result - - if include_format: - return f"data:image/{format};base64," + base64.b64encode(content).decode("utf-8") - else: - return base64.b64encode(content).decode("utf-8") - - -def augment_content_with_response_format_prompt(response_format, content): - if fmt_prompt := response_format_prompt(response_format): - if isinstance(content, list): - return content + [TextContentItem(text=fmt_prompt)] - elif isinstance(content, str): - return [TextContentItem(text=content), TextContentItem(text=fmt_prompt)] - else: - return [content, TextContentItem(text=fmt_prompt)] - - return content - - -async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llama_model: str) -> str: - messages = chat_completion_request_to_messages(request, llama_model) - request.messages = messages - request = await convert_request_to_raw(request) - - formatter = ChatFormat(tokenizer=Tokenizer.get_instance()) - model_input = formatter.encode_dialog_prompt( - request.messages, - tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model), - ) - return formatter.tokenizer.decode(model_input.tokens) - - -async def chat_completion_request_to_model_input_info( - request: ChatCompletionRequest, llama_model: str -) -> tuple[str, int]: - messages = chat_completion_request_to_messages(request, llama_model) - request.messages = messages - request = await convert_request_to_raw(request) - - formatter = ChatFormat(tokenizer=Tokenizer.get_instance()) - model_input = formatter.encode_dialog_prompt( - request.messages, - tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model), - ) - return ( - formatter.tokenizer.decode(model_input.tokens), - len(model_input.tokens), - ) - - -def chat_completion_request_to_messages( - request: ChatCompletionRequest, - llama_model: str, -) -> list[Message]: - """Reads chat completion request and augments the messages to handle tools. - For eg. for llama_3_1, add system message with the appropriate tools or - add user messsage for custom tools, etc. - """ - assert llama_model is not None, "llama_model is required" - model = resolve_model(llama_model) - if model is None: - log.error(f"Could not resolve model {llama_model}") - return request.messages - - allowed_models = supported_inference_models() - descriptors = [m.descriptor() for m in allowed_models] - if model.descriptor() not in descriptors: - log.error(f"Unsupported inference model? {model.descriptor()}") - return request.messages - - if model.model_family == ModelFamily.llama3_1 or ( - model.model_family == ModelFamily.llama3_2 and is_multimodal(model.core_model_id) - ): - # llama3.1 and llama3.2 multimodal models follow the same tool prompt format - messages = augment_messages_for_tools_llama_3_1(request) - elif model.model_family in ( - ModelFamily.llama3_2, - ModelFamily.llama3_3, - ): - # llama3.2, llama3.3 follow the same tool prompt format - messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator) - elif model.model_family == ModelFamily.llama4: - messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4) - else: - messages = request.messages - - if fmt_prompt := response_format_prompt(request.response_format): - messages.append(UserMessage(content=fmt_prompt)) - - return messages - - -def response_format_prompt(fmt: ResponseFormat | None): - if not fmt: - return None - - if fmt.type == ResponseFormatType.json_schema.value: - return f"Please respond in JSON format with the schema: {json.dumps(fmt.json_schema)}" - elif fmt.type == ResponseFormatType.grammar.value: - raise NotImplementedError("Grammar response format not supported yet") - else: - raise ValueError(f"Unknown response format {fmt.type}") - - -def augment_messages_for_tools_llama_3_1( - request: ChatCompletionRequest, -) -> list[Message]: - existing_messages = request.messages - existing_system_message = None - if existing_messages[0].role == Role.system.value: - existing_system_message = existing_messages.pop(0) - - assert existing_messages[0].role != Role.system.value, "Should only have 1 system message" - - messages = [] - - default_gen = SystemDefaultGenerator() - default_template = default_gen.gen() - - sys_content = "" - - tool_template = None - if request.tools: - tool_gen = BuiltinToolGenerator() - tool_template = tool_gen.gen(request.tools) - - sys_content += tool_template.render() - sys_content += "\n" - - sys_content += default_template.render() - - if existing_system_message: - # TODO: this fn is needed in many places - def _process(c): - if isinstance(c, str): - return c - else: - return "" - - sys_content += "\n" - - if isinstance(existing_system_message.content, str): - sys_content += _process(existing_system_message.content) - elif isinstance(existing_system_message.content, list): - sys_content += "\n".join([_process(c) for c in existing_system_message.content]) - - tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools) - if tool_choice_prompt: - sys_content += "\n" + tool_choice_prompt - - messages.append(SystemMessage(content=sys_content)) - - has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools) - if has_custom_tools: - fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json - if fmt == ToolPromptFormat.json: - tool_gen = JsonCustomToolGenerator() - elif fmt == ToolPromptFormat.function_tag: - tool_gen = FunctionTagCustomToolGenerator() - else: - raise ValueError(f"Non supported ToolPromptFormat {fmt}") - - custom_tools = [t for t in request.tools if isinstance(t.tool_name, str)] - custom_template = tool_gen.gen(custom_tools) - messages.append(UserMessage(content=custom_template.render())) - - # Add back existing messages from the request - messages += existing_messages - - return messages - - -def augment_messages_for_tools_llama( - request: ChatCompletionRequest, - custom_tool_prompt_generator, -) -> list[Message]: - existing_messages = request.messages - existing_system_message = None - if existing_messages[0].role == Role.system.value: - existing_system_message = existing_messages.pop(0) - - assert existing_messages[0].role != Role.system.value, "Should only have 1 system message" - - sys_content = "" - custom_tools, builtin_tools = [], [] - for t in request.tools: - if isinstance(t.tool_name, str): - custom_tools.append(t) - else: - builtin_tools.append(t) - - if builtin_tools: - tool_gen = BuiltinToolGenerator() - tool_template = tool_gen.gen(builtin_tools) - - sys_content += tool_template.render() - sys_content += "\n" - - custom_tools = [dfn for dfn in request.tools if isinstance(dfn.tool_name, str)] - if custom_tools: - fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.python_list - if fmt != ToolPromptFormat.python_list: - raise ValueError(f"Non supported ToolPromptFormat {request.tool_config.tool_prompt_format}") - - system_prompt = None - if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace: - system_prompt = existing_system_message.content - - tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt) - - sys_content += tool_template.render() - sys_content += "\n" - - if existing_system_message and ( - request.tool_config.system_message_behavior == SystemMessageBehavior.append or not custom_tools - ): - sys_content += interleaved_content_as_str(existing_system_message.content, sep="\n") - - tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools) - if tool_choice_prompt: - sys_content += "\n" + tool_choice_prompt - - messages = [SystemMessage(content=sys_content.strip("\n")), *existing_messages] - return messages - - -def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str: - if tool_choice == ToolChoice.auto: - return "" - elif tool_choice == ToolChoice.required: - return "You MUST use one of the provided functions/tools to answer the user query." - elif tool_choice == ToolChoice.none: - # tools are already not passed in - return "" - else: - # specific tool - return f"You MUST use the tool `{tool_choice}` to answer the user query." - - -def get_default_tool_prompt_format(model: str) -> ToolPromptFormat: - llama_model = resolve_model(model) - if llama_model is None: - log.warning(f"Could not resolve model {model}, defaulting to json tool prompt format") - return ToolPromptFormat.json - - if llama_model.model_family == ModelFamily.llama3_1 or ( - llama_model.model_family == ModelFamily.llama3_2 and is_multimodal(llama_model.core_model_id) - ): - # llama3.1 and llama3.2 multimodal models follow the same tool prompt format - return ToolPromptFormat.json - elif llama_model.model_family in ( - ModelFamily.llama3_2, - ModelFamily.llama3_3, - ModelFamily.llama4, - ): - # llama3.2 and llama3.3 models follow the same tool prompt format - return ToolPromptFormat.python_list - else: - return ToolPromptFormat.json diff --git a/llama_stack/providers/utils/kvstore/__init__.py b/llama_stack/providers/utils/kvstore/__init__.py deleted file mode 100644 index 470a75d2d..000000000 --- a/llama_stack/providers/utils/kvstore/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .kvstore import * # noqa: F401, F403 diff --git a/llama_stack/providers/utils/kvstore/api.py b/llama_stack/providers/utils/kvstore/api.py deleted file mode 100644 index d17dc66e1..000000000 --- a/llama_stack/providers/utils/kvstore/api.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from datetime import datetime -from typing import Protocol - - -class KVStore(Protocol): - # TODO: make the value type bytes instead of str - async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ... - - async def get(self, key: str) -> str | None: ... - - async def delete(self, key: str) -> None: ... - - async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ... - - async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ... diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/llama_stack/providers/utils/kvstore/kvstore.py deleted file mode 100644 index eee51e5d9..000000000 --- a/llama_stack/providers/utils/kvstore/kvstore.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from __future__ import annotations - -from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType - -from .api import KVStore -from .config import KVStoreConfig - - -def kvstore_dependencies(): - """ - Returns all possible kvstore dependencies for registry/provider specifications. - - NOTE: For specific kvstore implementations, use config.pip_packages instead. - This function returns the union of all dependencies for cases where the specific - kvstore type is not known at declaration time (e.g., provider registries). - """ - return ["aiosqlite", "psycopg2-binary", "redis", "pymongo"] - - -class InmemoryKVStoreImpl(KVStore): - def __init__(self): - self._store = {} - - async def initialize(self) -> None: - pass - - async def get(self, key: str) -> str | None: - return self._store.get(key) - - async def set(self, key: str, value: str) -> None: - self._store[key] = value - - async def values_in_range(self, start_key: str, end_key: str) -> list[str]: - return [self._store[key] for key in self._store.keys() if key >= start_key and key < end_key] - - async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: - """Get all keys in the given range.""" - return [key for key in self._store.keys() if key >= start_key and key < end_key] - - async def delete(self, key: str) -> None: - del self._store[key] - - -_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} - - -def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: - """Register the set of available KV store backends for reference resolution.""" - global _KVSTORE_BACKENDS - - _KVSTORE_BACKENDS.clear() - for name, cfg in backends.items(): - _KVSTORE_BACKENDS[name] = cfg - - -async def kvstore_impl(reference: KVStoreReference) -> KVStore: - backend_name = reference.backend - - backend_config = _KVSTORE_BACKENDS.get(backend_name) - if backend_config is None: - raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") - - config = backend_config.model_copy() - config.namespace = reference.namespace - - if config.type == StorageBackendType.KV_REDIS.value: - from .redis import RedisKVStoreImpl - - impl = RedisKVStoreImpl(config) - elif config.type == StorageBackendType.KV_SQLITE.value: - from .sqlite import SqliteKVStoreImpl - - impl = SqliteKVStoreImpl(config) - elif config.type == StorageBackendType.KV_POSTGRES.value: - from .postgres import PostgresKVStoreImpl - - impl = PostgresKVStoreImpl(config) - elif config.type == StorageBackendType.KV_MONGODB.value: - from .mongodb import MongoDBKVStoreImpl - - impl = MongoDBKVStoreImpl(config) - else: - raise ValueError(f"Unknown kvstore type {config.type}") - - await impl.initialize() - return impl diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/llama_stack/providers/utils/kvstore/postgres/postgres.py deleted file mode 100644 index 56d6dbb48..000000000 --- a/llama_stack/providers/utils/kvstore/postgres/postgres.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from datetime import datetime - -import psycopg2 -from psycopg2.extras import DictCursor - -from llama_stack.log import get_logger - -from ..api import KVStore -from ..config import PostgresKVStoreConfig - -log = get_logger(name=__name__, category="providers::utils") - - -class PostgresKVStoreImpl(KVStore): - def __init__(self, config: PostgresKVStoreConfig): - self.config = config - self.conn = None - self.cursor = None - - async def initialize(self) -> None: - try: - self.conn = psycopg2.connect( - host=self.config.host, - port=self.config.port, - database=self.config.db, - user=self.config.user, - password=self.config.password, - sslmode=self.config.ssl_mode, - sslrootcert=self.config.ca_cert_path, - ) - self.conn.autocommit = True - self.cursor = self.conn.cursor(cursor_factory=DictCursor) - - # Create table if it doesn't exist - self.cursor.execute( - f""" - CREATE TABLE IF NOT EXISTS {self.config.table_name} ( - key TEXT PRIMARY KEY, - value TEXT, - expiration TIMESTAMP - ) - """ - ) - except Exception as e: - log.exception("Could not connect to PostgreSQL database server") - raise RuntimeError("Could not connect to PostgreSQL database server") from e - - def _namespaced_key(self, key: str) -> str: - if not self.config.namespace: - return key - return f"{self.config.namespace}:{key}" - - async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: - key = self._namespaced_key(key) - self.cursor.execute( - f""" - INSERT INTO {self.config.table_name} (key, value, expiration) - VALUES (%s, %s, %s) - ON CONFLICT (key) DO UPDATE - SET value = EXCLUDED.value, expiration = EXCLUDED.expiration - """, - (key, value, expiration), - ) - - async def get(self, key: str) -> str | None: - key = self._namespaced_key(key) - self.cursor.execute( - f""" - SELECT value FROM {self.config.table_name} - WHERE key = %s - AND (expiration IS NULL OR expiration > NOW()) - """, - (key,), - ) - result = self.cursor.fetchone() - return result[0] if result else None - - async def delete(self, key: str) -> None: - key = self._namespaced_key(key) - self.cursor.execute( - f"DELETE FROM {self.config.table_name} WHERE key = %s", - (key,), - ) - - async def values_in_range(self, start_key: str, end_key: str) -> list[str]: - start_key = self._namespaced_key(start_key) - end_key = self._namespaced_key(end_key) - - self.cursor.execute( - f""" - SELECT value FROM {self.config.table_name} - WHERE key >= %s AND key < %s - AND (expiration IS NULL OR expiration > NOW()) - ORDER BY key - """, - (start_key, end_key), - ) - return [row[0] for row in self.cursor.fetchall()] - - async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: - start_key = self._namespaced_key(start_key) - end_key = self._namespaced_key(end_key) - - self.cursor.execute( - f"SELECT key FROM {self.config.table_name} WHERE key >= %s AND key < %s", - (start_key, end_key), - ) - return [row[0] for row in self.cursor.fetchall()] diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/llama_stack/providers/utils/kvstore/redis/redis.py deleted file mode 100644 index 3d2d956c3..000000000 --- a/llama_stack/providers/utils/kvstore/redis/redis.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from datetime import datetime - -from redis.asyncio import Redis - -from ..api import KVStore -from ..config import RedisKVStoreConfig - - -class RedisKVStoreImpl(KVStore): - def __init__(self, config: RedisKVStoreConfig): - self.config = config - - async def initialize(self) -> None: - self.redis = Redis.from_url(self.config.url) - - def _namespaced_key(self, key: str) -> str: - if not self.config.namespace: - return key - return f"{self.config.namespace}:{key}" - - async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: - key = self._namespaced_key(key) - await self.redis.set(key, value) - if expiration: - await self.redis.expireat(key, expiration) - - async def get(self, key: str) -> str | None: - key = self._namespaced_key(key) - value = await self.redis.get(key) - if value is None: - return None - await self.redis.ttl(key) - return value - - async def delete(self, key: str) -> None: - key = self._namespaced_key(key) - await self.redis.delete(key) - - async def values_in_range(self, start_key: str, end_key: str) -> list[str]: - start_key = self._namespaced_key(start_key) - end_key = self._namespaced_key(end_key) - cursor = 0 - pattern = start_key + "*" # Match all keys starting with start_key prefix - matching_keys = [] - while True: - cursor, keys = await self.redis.scan(cursor, match=pattern, count=1000) - - for key in keys: - key_str = key.decode("utf-8") if isinstance(key, bytes) else key - if start_key <= key_str <= end_key: - matching_keys.append(key) - - if cursor == 0: - break - - # Then fetch all values in a single MGET call - if matching_keys: - values = await self.redis.mget(matching_keys) - return [ - value.decode("utf-8") if isinstance(value, bytes) else value for value in values if value is not None - ] - - return [] - - async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: - """Get all keys in the given range.""" - matching_keys = await self.redis.zrangebylex(self.namespace, f"[{start_key}", f"[{end_key}") - if not matching_keys: - return [] - return [k.decode("utf-8") for k in matching_keys] diff --git a/llama_stack/providers/utils/kvstore/sqlite/config.py b/llama_stack/providers/utils/kvstore/sqlite/config.py deleted file mode 100644 index 6a8b0a7cf..000000000 --- a/llama_stack/providers/utils/kvstore/sqlite/config.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from pydantic import BaseModel, Field - -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class SqliteControlPlaneConfig(BaseModel): - db_path: str = Field( - description="File path for the sqlite database", - ) - table_name: str = Field( - default="llamastack_control_plane", - description="Table into which all the keys will be placed", - ) diff --git a/llama_stack/providers/utils/memory/__init__.py b/llama_stack/providers/utils/memory/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/memory/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/responses/__init__.py b/llama_stack/providers/utils/responses/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/responses/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py deleted file mode 100644 index d5c243252..000000000 --- a/llama_stack/providers/utils/responses/responses_store.py +++ /dev/null @@ -1,354 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -import asyncio -from typing import Any - -from llama_stack.apis.agents import ( - Order, -) -from llama_stack.apis.agents.openai_responses import ( - ListOpenAIResponseInputItem, - ListOpenAIResponseObject, - OpenAIDeleteResponseObject, - OpenAIResponseInput, - OpenAIResponseObject, - OpenAIResponseObjectWithInput, -) -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType -from llama_stack.log import get_logger - -from ..sqlstore.api import ColumnDefinition, ColumnType -from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl - -logger = get_logger(name=__name__, category="openai_responses") - - -class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput): - """Internal class for storing responses with chat completion messages. - - This extends the public OpenAIResponseObjectWithInput with messages field - for internal storage. The messages field is not exposed in the public API. - - The messages field is optional for backward compatibility with responses - stored before this feature was added. - """ - - messages: list[OpenAIMessageParam] | None = None - - -class ResponsesStore: - def __init__( - self, - reference: ResponsesStoreReference | SqlStoreReference, - policy: list[AccessRule], - ): - if isinstance(reference, ResponsesStoreReference): - self.reference = reference - else: - self.reference = ResponsesStoreReference(**reference.model_dump()) - - self.policy = policy - self.sql_store = None - self.enable_write_queue = True - - # Async write queue and worker control - self._queue: ( - asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None - ) = None - self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = self.reference.max_write_queue_size - self._num_writers: int = max(1, self.reference.num_writers) - - async def initialize(self): - """Create the necessary tables if they don't exist.""" - base_store = sqlstore_impl(self.reference) - self.sql_store = AuthorizedSqlStore(base_store, self.policy) - - backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) - if backend_config is None: - raise ValueError( - f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" - ) - if backend_config.type == StorageBackendType.SQL_SQLITE: - self.enable_write_queue = False - await self.sql_store.create_table( - "openai_responses", - { - "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "created_at": ColumnType.INTEGER, - "response_object": ColumnType.JSON, - "model": ColumnType.STRING, - }, - ) - - await self.sql_store.create_table( - "conversation_messages", - { - "conversation_id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), - "messages": ColumnType.JSON, - }, - ) - - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - else: - logger.debug("Write queue disabled for SQLite to avoid concurrency issues") - - async def shutdown(self) -> None: - if not self._worker_tasks: - return - if self._queue is not None: - await self._queue.join() - for t in self._worker_tasks: - if not t.done(): - t.cancel() - for t in self._worker_tasks: - try: - await t - except asyncio.CancelledError: - pass - self._worker_tasks.clear() - - async def flush(self) -> None: - """Wait for all queued writes to complete. Useful for testing.""" - if self.enable_write_queue and self._queue is not None: - await self._queue.join() - - async def store_response_object( - self, - response_object: OpenAIResponseObject, - input: list[OpenAIResponseInput], - messages: list[OpenAIMessageParam], - ) -> None: - if self.enable_write_queue: - if self._queue is None: - raise ValueError("Responses store is not initialized") - try: - self._queue.put_nowait((response_object, input, messages)) - except asyncio.QueueFull: - logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") - await self._queue.put((response_object, input, messages)) - else: - await self._write_response_object(response_object, input, messages) - - async def _worker_loop(self) -> None: - assert self._queue is not None - while True: - try: - item = await self._queue.get() - except asyncio.CancelledError: - break - response_object, input, messages = item - try: - await self._write_response_object(response_object, input, messages) - except Exception as e: # noqa: BLE001 - logger.error(f"Error writing response object: {e}") - finally: - self._queue.task_done() - - async def _write_response_object( - self, - response_object: OpenAIResponseObject, - input: list[OpenAIResponseInput], - messages: list[OpenAIMessageParam], - ) -> None: - if self.sql_store is None: - raise ValueError("Responses store is not initialized") - - data = response_object.model_dump() - data["input"] = [input_item.model_dump() for input_item in input] - data["messages"] = [msg.model_dump() for msg in messages] - - await self.sql_store.insert( - "openai_responses", - { - "id": data["id"], - "created_at": data["created_at"], - "model": data["model"], - "response_object": data, - }, - ) - - async def list_responses( - self, - after: str | None = None, - limit: int | None = 50, - model: str | None = None, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseObject: - """ - List responses from the database. - - :param after: The ID of the last response to return. - :param limit: The maximum number of responses to return. - :param model: The model to filter by. - :param order: The order to sort the responses by. - """ - if not self.sql_store: - raise ValueError("Responses store is not initialized") - - if not order: - order = Order.desc - - where_conditions = {} - if model: - where_conditions["model"] = model - - paginated_result = await self.sql_store.fetch_all( - table="openai_responses", - where=where_conditions if where_conditions else None, - order_by=[("created_at", order.value)], - cursor=("id", after) if after else None, - limit=limit, - ) - - data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in paginated_result.data] - return ListOpenAIResponseObject( - data=data, - has_more=paginated_result.has_more, - first_id=data[0].id if data else "", - last_id=data[-1].id if data else "", - ) - - async def get_response_object(self, response_id: str) -> _OpenAIResponseObjectWithInputAndMessages: - """ - Get a response object with automatic access control checking. - """ - if not self.sql_store: - raise ValueError("Responses store is not initialized") - - row = await self.sql_store.fetch_one( - "openai_responses", - where={"id": response_id}, - ) - - if not row: - # SecureSqlStore will return None if record doesn't exist OR access is denied - # This provides security by not revealing whether the record exists - raise ValueError(f"Response with id {response_id} not found") from None - - return _OpenAIResponseObjectWithInputAndMessages(**row["response_object"]) - - async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject: - if not self.sql_store: - raise ValueError("Responses store is not initialized") - - row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}) - if not row: - raise ValueError(f"Response with id {response_id} not found") - await self.sql_store.delete("openai_responses", where={"id": response_id}) - return OpenAIDeleteResponseObject(id=response_id) - - async def list_response_input_items( - self, - response_id: str, - after: str | None = None, - before: str | None = None, - include: list[str] | None = None, - limit: int | None = 20, - order: Order | None = Order.desc, - ) -> ListOpenAIResponseInputItem: - """ - List input items for a given response. - - :param response_id: The ID of the response to retrieve input items for. - :param after: An item ID to list items after, used for pagination. - :param before: An item ID to list items before, used for pagination. - :param include: Additional fields to include in the response. - :param limit: A limit on the number of objects to be returned. - :param order: The order to return the input items in. - """ - if include: - raise NotImplementedError("Include is not supported yet") - if before and after: - raise ValueError("Cannot specify both 'before' and 'after' parameters") - - response_with_input_and_messages = await self.get_response_object(response_id) - items = response_with_input_and_messages.input - - if order == Order.desc: - items = list(reversed(items)) - - start_index = 0 - end_index = len(items) - - if after or before: - for i, item in enumerate(items): - item_id = getattr(item, "id", None) - if after and item_id == after: - start_index = i + 1 - if before and item_id == before: - end_index = i - break - - if after and start_index == 0: - raise ValueError(f"Input item with id '{after}' not found for response '{response_id}'") - if before and end_index == len(items): - raise ValueError(f"Input item with id '{before}' not found for response '{response_id}'") - - items = items[start_index:end_index] - - # Apply limit - if limit is not None: - items = items[:limit] - - return ListOpenAIResponseInputItem(data=items) - - async def store_conversation_messages(self, conversation_id: str, messages: list[OpenAIMessageParam]) -> None: - """Store messages for a conversation. - - :param conversation_id: The conversation identifier. - :param messages: List of OpenAI message parameters to store. - """ - if not self.sql_store: - raise ValueError("Responses store is not initialized") - - # Serialize messages to dict format for JSON storage - messages_data = [msg.model_dump() for msg in messages] - - # Upsert: try insert first, update if exists - try: - await self.sql_store.insert( - table="conversation_messages", - data={"conversation_id": conversation_id, "messages": messages_data}, - ) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_messages", - data={"messages": messages_data}, - where={"conversation_id": conversation_id}, - ) - - logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}") - - async def get_conversation_messages(self, conversation_id: str) -> list[OpenAIMessageParam] | None: - """Get stored messages for a conversation. - - :param conversation_id: The conversation identifier. - :returns: List of OpenAI message parameters, or None if no messages stored. - """ - if not self.sql_store: - raise ValueError("Responses store is not initialized") - - record = await self.sql_store.fetch_one( - table="conversation_messages", - where={"conversation_id": conversation_id}, - ) - - if record is None: - return None - - # Deserialize messages from JSON storage - from pydantic import TypeAdapter - - adapter = TypeAdapter(list[OpenAIMessageParam]) - return adapter.validate_python(record["messages"]) diff --git a/llama_stack/providers/utils/scoring/__init__.py b/llama_stack/providers/utils/scoring/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/scoring/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/sqlstore/__init__.py b/llama_stack/providers/utils/sqlstore/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/sqlstore/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/sqlstore/api.py b/llama_stack/providers/utils/sqlstore/api.py deleted file mode 100644 index a61fd1090..000000000 --- a/llama_stack/providers/utils/sqlstore/api.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import Mapping, Sequence -from enum import Enum -from typing import Any, Literal, Protocol - -from pydantic import BaseModel - -from llama_stack.apis.common.responses import PaginatedResponse - - -class ColumnType(Enum): - INTEGER = "INTEGER" - STRING = "STRING" - TEXT = "TEXT" - FLOAT = "FLOAT" - BOOLEAN = "BOOLEAN" - JSON = "JSON" - DATETIME = "DATETIME" - - -class ColumnDefinition(BaseModel): - type: ColumnType - primary_key: bool = False - nullable: bool = True - default: Any = None - - -class SqlStore(Protocol): - """ - A protocol for a SQL store. - """ - - async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None: - """ - Create a table. - """ - pass - - async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None: - """ - Insert a row or batch of rows into a table. - """ - pass - - async def fetch_all( - self, - table: str, - where: Mapping[str, Any] | None = None, - where_sql: str | None = None, - limit: int | None = None, - order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, - cursor: tuple[str, str] | None = None, - ) -> PaginatedResponse: - """ - Fetch all rows from a table with optional cursor-based pagination. - - :param table: The table name - :param where: Simple key-value WHERE conditions - :param where_sql: Raw SQL WHERE clause for complex queries - :param limit: Maximum number of records to return - :param order_by: List of (column, order) tuples for sorting - :param cursor: Tuple of (key_column, cursor_id) for pagination (None for first page) - Requires order_by with exactly one column when used - :return: PaginatedResult with data and has_more flag - - Note: Cursor pagination only supports single-column ordering for simplicity. - Multi-column ordering is allowed without cursor but will raise an error with cursor. - """ - pass - - async def fetch_one( - self, - table: str, - where: Mapping[str, Any] | None = None, - where_sql: str | None = None, - order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, - ) -> dict[str, Any] | None: - """ - Fetch one row from a table. - """ - pass - - async def update( - self, - table: str, - data: Mapping[str, Any], - where: Mapping[str, Any], - ) -> None: - """ - Update a row in a table. - """ - pass - - async def delete( - self, - table: str, - where: Mapping[str, Any], - ) -> None: - """ - Delete a row from a table. - """ - pass - - async def add_column_if_not_exists( - self, - table: str, - column_name: str, - column_type: ColumnType, - nullable: bool = True, - ) -> None: - """ - Add a column to an existing table if the column doesn't already exist. - - This is useful for table migrations when adding new functionality. - If the table doesn't exist, this method should do nothing. - If the column already exists, this method should do nothing. - - :param table: Table name - :param column_name: Name of the column to add - :param column_type: Type of the column to add - :param nullable: Whether the column should be nullable (default: True) - """ - pass diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py deleted file mode 100644 index 31801c4ca..000000000 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Annotated, cast - -from pydantic import Field - -from llama_stack.core.storage.datatypes import ( - PostgresSqlStoreConfig, - SqliteSqlStoreConfig, - SqlStoreReference, - StorageBackendConfig, - StorageBackendType, -) - -from .api import SqlStore - -sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] - -_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} - - -SqlStoreConfig = Annotated[ - SqliteSqlStoreConfig | PostgresSqlStoreConfig, - Field(discriminator="type"), -] - - -def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: - """Get pip packages for SQL store config, handling both dict and object cases.""" - if isinstance(store_config, dict): - store_type = store_config.get("type") - if store_type == StorageBackendType.SQL_SQLITE.value: - return SqliteSqlStoreConfig.pip_packages() - elif store_type == StorageBackendType.SQL_POSTGRES.value: - return PostgresSqlStoreConfig.pip_packages() - else: - raise ValueError(f"Unknown SQL store type: {store_type}") - else: - return store_config.pip_packages() - - -def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: - backend_name = reference.backend - - backend_config = _SQLSTORE_BACKENDS.get(backend_name) - if backend_config is None: - raise ValueError( - f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" - ) - - if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): - from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl - - config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() - return SqlAlchemySqlStoreImpl(config) - else: - raise ValueError(f"Unknown sqlstore type {backend_config.type}") - - -def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: - """Register the set of available SQL store backends for reference resolution.""" - global _SQLSTORE_BACKENDS - - _SQLSTORE_BACKENDS.clear() - for name, cfg in backends.items(): - _SQLSTORE_BACKENDS[name] = cfg diff --git a/llama_stack/providers/utils/telemetry/__init__.py b/llama_stack/providers/utils/telemetry/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/telemetry/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py deleted file mode 100644 index e9320b7a8..000000000 --- a/llama_stack/providers/utils/telemetry/trace_protocol.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import inspect -import json -from collections.abc import AsyncGenerator, Callable -from functools import wraps -from typing import Any - -from pydantic import BaseModel - -from llama_stack.models.llama.datatypes import Primitive - - -def serialize_value(value: Any) -> Primitive: - return str(_prepare_for_json(value)) - - -def _prepare_for_json(value: Any) -> str: - """Serialize a single value into JSON-compatible format.""" - if value is None: - return "" - elif isinstance(value, str | int | float | bool): - return value - elif hasattr(value, "_name_"): - return value._name_ - elif isinstance(value, BaseModel): - return json.loads(value.model_dump_json()) - elif isinstance(value, list | tuple | set): - return [_prepare_for_json(item) for item in value] - elif isinstance(value, dict): - return {str(k): _prepare_for_json(v) for k, v in value.items()} - else: - try: - json.dumps(value) - return value - except Exception: - return str(value) - - -def trace_protocol[T](cls: type[T]) -> type[T]: - """ - A class decorator that automatically traces all methods in a protocol/base class - and its inheriting classes. - """ - - def trace_method(method: Callable) -> Callable: - is_async = asyncio.iscoroutinefunction(method) - is_async_gen = inspect.isasyncgenfunction(method) - - def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple: - class_name = self.__class__.__name__ - method_name = method.__name__ - span_type = "async_generator" if is_async_gen else "async" if is_async else "sync" - sig = inspect.signature(method) - param_names = list(sig.parameters.keys())[1:] # Skip 'self' - combined_args = {} - for i, arg in enumerate(args): - param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}" - combined_args[param_name] = serialize_value(arg) - for k, v in kwargs.items(): - combined_args[str(k)] = serialize_value(v) - - span_attributes = { - "__autotraced__": True, - "__class__": class_name, - "__method__": method_name, - "__type__": span_type, - "__args__": json.dumps(combined_args), - } - - return class_name, method_name, span_attributes - - @wraps(method) - async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - count = 0 - try: - async for item in method(self, *args, **kwargs): - yield item - count += 1 - finally: - span.set_attribute("chunk_count", count) - - @wraps(method) - async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - try: - result = await method(self, *args, **kwargs) - span.set_attribute("output", serialize_value(result)) - return result - except Exception as e: - span.set_attribute("error", str(e)) - raise - - @wraps(method) - def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - try: - result = method(self, *args, **kwargs) - span.set_attribute("output", serialize_value(result)) - return result - except Exception as e: - span.set_attribute("error", str(e)) - raise - - if is_async_gen: - return async_gen_wrapper - elif is_async: - return async_wrapper - else: - return sync_wrapper - - original_init_subclass = getattr(cls, "__init_subclass__", None) - - def __init_subclass__(cls_child, **kwargs): # noqa: N807 - if original_init_subclass: - original_init_subclass(**kwargs) - - for name, method in vars(cls_child).items(): - if inspect.isfunction(method) and not name.startswith("_"): - setattr(cls_child, name, trace_method(method)) # noqa: B010 - - cls.__init_subclass__ = classmethod(__init_subclass__) - - return cls diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py deleted file mode 100644 index 62cceb13e..000000000 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import contextvars -import logging # allow-direct-logging -import queue -import secrets -import sys -import threading -import time -from collections.abc import Callable -from datetime import UTC, datetime -from functools import wraps -from typing import Any - -from llama_stack.apis.telemetry import ( - Event, - LogSeverity, - Span, - SpanEndPayload, - SpanStartPayload, - SpanStatus, - StructuredLogEvent, - Telemetry, - UnstructuredLogEvent, -) -from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value - -logger = get_logger(__name__, category="core") - -# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion -_fallback_logger = logging.getLogger("llama_stack.telemetry.background") -if not _fallback_logger.handlers: - _fallback_logger.propagate = False - _fallback_logger.setLevel(logging.ERROR) - _fallback_handler = logging.StreamHandler(sys.stderr) - _fallback_handler.setLevel(logging.ERROR) - _fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s")) - _fallback_logger.addHandler(_fallback_handler) - - -INVALID_SPAN_ID = 0x0000000000000000 -INVALID_TRACE_ID = 0x00000000000000000000000000000000 - -ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] -# The logical root span may not be visible to this process if a parent context -# is passed in. The local root span is the first local span in a trace. -LOCAL_ROOT_SPAN_MARKER = "__local_root_span__" - - -def trace_id_to_str(trace_id: int) -> str: - """Convenience trace ID formatting method - Args: - trace_id: Trace ID int - - Returns: - The trace ID as 32-byte hexadecimal string - """ - return format(trace_id, "032x") - - -def span_id_to_str(span_id: int) -> str: - """Convenience span ID formatting method - Args: - span_id: Span ID int - - Returns: - The span ID as 16-byte hexadecimal string - """ - return format(span_id, "016x") - - -def generate_span_id() -> str: - span_id = secrets.randbits(64) - while span_id == INVALID_SPAN_ID: - span_id = secrets.randbits(64) - return span_id_to_str(span_id) - - -def generate_trace_id() -> str: - trace_id = secrets.randbits(128) - while trace_id == INVALID_TRACE_ID: - trace_id = secrets.randbits(128) - return trace_id_to_str(trace_id) - - -CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None) -BACKGROUND_LOGGER = None - -LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0 - - -class BackgroundLogger: - def __init__(self, api: Telemetry, capacity: int = 100000): - self.api = api - self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity) - self.worker_thread = threading.Thread(target=self._worker, daemon=True) - self.worker_thread.start() - self._last_queue_full_log_time: float = 0.0 - self._dropped_since_last_notice: int = 0 - - def log_event(self, event): - try: - self.log_queue.put_nowait(event) - except queue.Full: - # Aggregate drops and emit at most once per interval via fallback logger - self._dropped_since_last_notice += 1 - current_time = time.time() - if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS: - _fallback_logger.error( - "Log queue is full; dropped %d events since last notice", - self._dropped_since_last_notice, - ) - self._last_queue_full_log_time = current_time - self._dropped_since_last_notice = 0 - - def _worker(self): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(self._process_logs()) - - async def _process_logs(self): - while True: - try: - event = self.log_queue.get() - await self.api.log_event(event) - except Exception: - import traceback - - traceback.print_exc() - print("Error processing log event") - finally: - self.log_queue.task_done() - - def __del__(self): - self.log_queue.join() - - -def enqueue_event(event: Event) -> None: - """Enqueue a telemetry event to the background logger if available. - - This provides a non-blocking path for routers and other hot paths to - submit telemetry without awaiting the Telemetry API, reducing contention - with the main event loop. - """ - global BACKGROUND_LOGGER - if BACKGROUND_LOGGER is None: - raise RuntimeError("Telemetry API not initialized") - BACKGROUND_LOGGER.log_event(event) - - -class TraceContext: - spans: list[Span] = [] - - def __init__(self, logger: BackgroundLogger, trace_id: str): - self.logger = logger - self.trace_id = trace_id - - def push_span(self, name: str, attributes: dict[str, Any] = None) -> Span: - current_span = self.get_current_span() - span = Span( - span_id=generate_span_id(), - trace_id=self.trace_id, - name=name, - start_time=datetime.now(UTC), - parent_span_id=current_span.span_id if current_span else None, - attributes=attributes, - ) - - self.logger.log_event( - StructuredLogEvent( - trace_id=span.trace_id, - span_id=span.span_id, - timestamp=span.start_time, - attributes=span.attributes, - payload=SpanStartPayload( - name=span.name, - parent_span_id=span.parent_span_id, - ), - ) - ) - - self.spans.append(span) - return span - - def pop_span(self, status: SpanStatus = SpanStatus.OK): - span = self.spans.pop() - if span is not None: - self.logger.log_event( - StructuredLogEvent( - trace_id=span.trace_id, - span_id=span.span_id, - timestamp=span.start_time, - attributes=span.attributes, - payload=SpanEndPayload( - status=status, - ), - ) - ) - - def get_current_span(self): - return self.spans[-1] if self.spans else None - - -def setup_logger(api: Telemetry, level: int = logging.INFO): - global BACKGROUND_LOGGER - - if BACKGROUND_LOGGER is None: - BACKGROUND_LOGGER = BackgroundLogger(api) - root_logger = logging.getLogger() - root_logger.setLevel(level) - root_logger.addHandler(TelemetryHandler()) - - -async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceContext: - global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER - - if BACKGROUND_LOGGER is None: - logger.debug("No Telemetry implementation set. Skipping trace initialization...") - return - - trace_id = generate_trace_id() - context = TraceContext(BACKGROUND_LOGGER, trace_id) - # Mark this span as the root for the trace for now. The processing of - # traceparent context if supplied comes later and will result in the - # ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root, - # i.e. the root of the spans originating in this process as this is - # needed to ensure that we insert this 'local' root span's id into - # the trace record in sqlite store. - attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {}) - context.push_span(name, attributes) - - CURRENT_TRACE_CONTEXT.set(context) - return context - - -async def end_trace(status: SpanStatus = SpanStatus.OK): - global CURRENT_TRACE_CONTEXT - - context = CURRENT_TRACE_CONTEXT.get() - if context is None: - logger.debug("No trace context to end") - return - - context.pop_span(status) - CURRENT_TRACE_CONTEXT.set(None) - - -def severity(levelname: str) -> LogSeverity: - if levelname == "DEBUG": - return LogSeverity.DEBUG - elif levelname == "INFO": - return LogSeverity.INFO - elif levelname == "WARNING": - return LogSeverity.WARN - elif levelname == "ERROR": - return LogSeverity.ERROR - elif levelname == "CRITICAL": - return LogSeverity.CRITICAL - else: - raise ValueError(f"Unknown log level: {levelname}") - - -# TODO: ideally, the actual emitting should be done inside a separate daemon -# process completely isolated from the server -class TelemetryHandler(logging.Handler): - def emit(self, record: logging.LogRecord): - # horrendous hack to avoid logging from asyncio and getting into an infinite loop - if record.module in ("asyncio", "selector_events"): - return - - global CURRENT_TRACE_CONTEXT - context = CURRENT_TRACE_CONTEXT.get() - if context is None: - return - - span = context.get_current_span() - if span is None: - return - - enqueue_event( - UnstructuredLogEvent( - trace_id=span.trace_id, - span_id=span.span_id, - timestamp=datetime.now(UTC), - message=self.format(record), - severity=severity(record.levelname), - ) - ) - - def close(self): - pass - - -class SpanContextManager: - def __init__(self, name: str, attributes: dict[str, Any] = None): - self.name = name - self.attributes = attributes - self.span = None - - def __enter__(self): - global CURRENT_TRACE_CONTEXT - context = CURRENT_TRACE_CONTEXT.get() - if not context: - logger.debug("No trace context to push span") - return self - - self.span = context.push_span(self.name, self.attributes) - return self - - def __exit__(self, exc_type, exc_value, traceback): - global CURRENT_TRACE_CONTEXT - context = CURRENT_TRACE_CONTEXT.get() - if not context: - logger.debug("No trace context to pop span") - return - - context.pop_span() - - def set_attribute(self, key: str, value: Any): - if self.span: - if self.span.attributes is None: - self.span.attributes = {} - self.span.attributes[key] = serialize_value(value) - - async def __aenter__(self): - global CURRENT_TRACE_CONTEXT - context = CURRENT_TRACE_CONTEXT.get() - if not context: - logger.debug("No trace context to push span") - return self - - self.span = context.push_span(self.name, self.attributes) - return self - - async def __aexit__(self, exc_type, exc_value, traceback): - global CURRENT_TRACE_CONTEXT - context = CURRENT_TRACE_CONTEXT.get() - if not context: - logger.debug("No trace context to pop span") - return - - context.pop_span() - - def __call__(self, func: Callable): - @wraps(func) - def sync_wrapper(*args, **kwargs): - with self: - return func(*args, **kwargs) - - @wraps(func) - async def async_wrapper(*args, **kwargs): - async with self: - return await func(*args, **kwargs) - - @wraps(func) - def wrapper(*args, **kwargs): - if asyncio.iscoroutinefunction(func): - return async_wrapper(*args, **kwargs) - else: - return sync_wrapper(*args, **kwargs) - - return wrapper - - -def span(name: str, attributes: dict[str, Any] = None): - return SpanContextManager(name, attributes) - - -def get_current_span() -> Span | None: - global CURRENT_TRACE_CONTEXT - if CURRENT_TRACE_CONTEXT is None: - logger.debug("No trace context to get current span") - return None - - context = CURRENT_TRACE_CONTEXT.get() - if context: - return context.get_current_span() - return None diff --git a/llama_stack/providers/utils/tools/__init__.py b/llama_stack/providers/utils/tools/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/tools/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/utils/tools/mcp.py b/llama_stack/providers/utils/tools/mcp.py deleted file mode 100644 index 48f07cb19..000000000 --- a/llama_stack/providers/utils/tools/mcp.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import AsyncGenerator -from contextlib import asynccontextmanager -from enum import Enum -from typing import Any, cast - -import httpx -from mcp import ClientSession, McpError -from mcp import types as mcp_types -from mcp.client.sse import sse_client -from mcp.client.streamable_http import streamablehttp_client - -from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem -from llama_stack.apis.tools import ( - ListToolDefsResponse, - ToolDef, - ToolInvocationResult, -) -from llama_stack.core.datatypes import AuthenticationRequiredError -from llama_stack.log import get_logger -from llama_stack.providers.utils.tools.ttl_dict import TTLDict - -logger = get_logger(__name__, category="tools") - -protocol_cache = TTLDict(ttl_seconds=3600) - - -class MCPProtol(Enum): - UNKNOWN = 0 - STREAMABLE_HTTP = 1 - SSE = 2 - - -@asynccontextmanager -async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]: - # we use a ttl'd dict to cache the happy path protocol for each endpoint - # but, we always fall back to trying the other protocol if we cannot initialize the session - connection_strategies = [MCPProtol.STREAMABLE_HTTP, MCPProtol.SSE] - mcp_protocol = protocol_cache.get(endpoint, default=MCPProtol.UNKNOWN) - if mcp_protocol == MCPProtol.SSE: - connection_strategies = [MCPProtol.SSE, MCPProtol.STREAMABLE_HTTP] - - for i, strategy in enumerate(connection_strategies): - try: - client = streamablehttp_client - if strategy == MCPProtol.SSE: - client = sse_client - async with client(endpoint, headers=headers) as client_streams: - async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session: - await session.initialize() - protocol_cache[endpoint] = strategy - yield session - return - except* httpx.HTTPStatusError as eg: - for exc in eg.exceptions: - # mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter, - # so we explicitly cast each item to httpx.HTTPStatusError. This is safe because - # `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type. - err = cast(httpx.HTTPStatusError, exc) - if err.response.status_code == 401: - raise AuthenticationRequiredError(exc) from exc - if i == len(connection_strategies) - 1: - raise - except* httpx.ConnectError as eg: - # Connection refused, server down, network unreachable - if i == len(connection_strategies) - 1: - error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused" - logger.error(f"MCP connection error: {error_msg}") - raise ConnectionError(error_msg) from eg - else: - logger.warning( - f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}" - ) - except* httpx.TimeoutException as eg: - # Request timeout, server too slow - if i == len(connection_strategies) - 1: - error_msg = f"MCP server at {endpoint} timed out" - logger.error(f"MCP timeout error: {error_msg}") - raise TimeoutError(error_msg) from eg - else: - logger.warning( - f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}" - ) - except* httpx.RequestError as eg: - # DNS resolution failures, network errors, invalid URLs - if i == len(connection_strategies) - 1: - # Get the first exception's message for the error string - exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error" - error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}" - logger.error(f"MCP network error: {error_msg}") - raise ConnectionError(error_msg) from eg - else: - logger.warning( - f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}" - ) - except* McpError: - if i < len(connection_strategies) - 1: - logger.warning( - f"failed to connect via {strategy.name}, falling back to {connection_strategies[i + 1].name}" - ) - else: - raise - - -async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse: - tools = [] - async with client_wrapper(endpoint, headers) as session: - tools_result = await session.list_tools() - for tool in tools_result.tools: - tools.append( - ToolDef( - name=tool.name, - description=tool.description, - input_schema=tool.inputSchema, - output_schema=getattr(tool, "outputSchema", None), - metadata={ - "endpoint": endpoint, - }, - ) - ) - return ListToolDefsResponse(data=tools) - - -async def invoke_mcp_tool( - endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any] -) -> ToolInvocationResult: - async with client_wrapper(endpoint, headers) as session: - result = await session.call_tool(tool_name, kwargs) - - content: list[InterleavedContentItem] = [] - for item in result.content: - if isinstance(item, mcp_types.TextContent): - content.append(TextContentItem(text=item.text)) - elif isinstance(item, mcp_types.ImageContent): - content.append(ImageContentItem(image=item.data)) - elif isinstance(item, mcp_types.EmbeddedResource): - logger.warning(f"EmbeddedResource is not supported: {item}") - else: - raise ValueError(f"Unknown content type: {type(item)}") - return ToolInvocationResult( - content=content, - error_code=1 if result.isError else 0, - ) diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/llama_stack/providers/utils/vector_io/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/utils/vector_io/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py deleted file mode 100644 index 8444d2a34..000000000 --- a/llama_stack/schema_utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from collections.abc import Callable -from dataclasses import dataclass -from typing import Any, TypeVar - -from .strong_typing.schema import json_schema_type, register_schema # noqa: F401 - - -class ExtraBodyField[T]: - """ - Marker annotation for parameters that arrive via extra_body in the client SDK. - - These parameters: - - Will NOT appear in the generated client SDK method signature - - WILL be documented in OpenAPI spec under x-llama-stack-extra-body-params - - MUST be passed via the extra_body parameter in client SDK calls - - WILL be available in server-side method signature with proper typing - - Example: - ```python - async def create_openai_response( - self, - input: str, - model: str, - shields: Annotated[ - list[str] | None, ExtraBodyField("List of shields to apply") - ] = None, - ) -> ResponseObject: - # shields is available here with proper typing - if shields: - print(f"Using shields: {shields}") - ``` - - Client usage: - ```python - client.responses.create( - input="hello", model="llama-3", extra_body={"shields": ["shield-1"]} - ) - ``` - """ - - def __init__(self, description: str | None = None): - self.description = description - - -@dataclass -class WebMethod: - level: str | None = None - route: str | None = None - public: bool = False - request_examples: list[Any] | None = None - response_examples: list[Any] | None = None - method: str | None = None - raw_bytes_request_body: bool | None = False - # A descriptive name of the corresponding span created by tracing - descriptive_name: str | None = None - required_scope: str | None = None - deprecated: bool | None = False - require_authentication: bool | None = True - - -CallableT = TypeVar("CallableT", bound=Callable[..., Any]) - - -def webmethod( - route: str | None = None, - method: str | None = None, - level: str | None = None, - public: bool | None = False, - request_examples: list[Any] | None = None, - response_examples: list[Any] | None = None, - raw_bytes_request_body: bool | None = False, - descriptive_name: str | None = None, - required_scope: str | None = None, - deprecated: bool | None = False, - require_authentication: bool | None = True, -) -> Callable[[CallableT], CallableT]: - """ - Decorator that supplies additional metadata to an endpoint operation function. - - :param route: The URL path pattern associated with this operation which path parameters are substituted into. - :param public: True if the operation can be invoked without prior authentication. - :param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON. - :param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON. - :param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer'). - :param require_authentication: Whether this endpoint requires authentication (default True). - """ - - def wrap(func: CallableT) -> CallableT: - webmethod_obj = WebMethod( - route=route, - method=method, - level=level, - public=public or False, - request_examples=request_examples, - response_examples=response_examples, - raw_bytes_request_body=raw_bytes_request_body, - descriptive_name=descriptive_name, - required_scope=required_scope, - deprecated=deprecated, - require_authentication=require_authentication if require_authentication is not None else True, - ) - - # Store all webmethods in a list to support multiple decorators - if not hasattr(func, "__webmethods__"): - func.__webmethods__ = [] # type: ignore - func.__webmethods__.append(webmethod_obj) # type: ignore - - # Keep the last one as __webmethod__ for backwards compatibility - func.__webmethod__ = webmethod_obj # type: ignore - return func - - return wrap diff --git a/llama_stack/strong_typing/__init__.py b/llama_stack/strong_typing/__init__.py deleted file mode 100644 index d832dcf6f..000000000 --- a/llama_stack/strong_typing/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -Provides auxiliary services for working with Python type annotations, converting typed data to and from JSON, -and generating a JSON schema for a complex type. -""" - -__version__ = "0.3.4" -__author__ = "Levente Hunyadi" -__copyright__ = "Copyright 2021-2024, Levente Hunyadi" -__license__ = "MIT" -__maintainer__ = "Levente Hunyadi" -__status__ = "Production" diff --git a/llama_stack/strong_typing/auxiliary.py b/llama_stack/strong_typing/auxiliary.py deleted file mode 100644 index 965ffa079..000000000 --- a/llama_stack/strong_typing/auxiliary.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import dataclasses -import sys -from dataclasses import is_dataclass -from typing import Callable, Dict, Optional, Type, TypeVar, Union, overload - -if sys.version_info >= (3, 9): - from typing import Annotated as Annotated -else: - from typing_extensions import Annotated as Annotated - -if sys.version_info >= (3, 10): - from typing import TypeAlias as TypeAlias -else: - from typing_extensions import TypeAlias as TypeAlias - -if sys.version_info >= (3, 11): - from typing import dataclass_transform as dataclass_transform -else: - from typing_extensions import dataclass_transform as dataclass_transform - -T = TypeVar("T") - - -def _compact_dataclass_repr(obj: object) -> str: - """ - Compact data-class representation where positional arguments are used instead of keyword arguments. - - :param obj: A data-class object. - :returns: A string that matches the pattern `Class(arg1, arg2, ...)`. - """ - - if is_dataclass(obj): - arglist = ", ".join(repr(getattr(obj, field.name)) for field in dataclasses.fields(obj)) - return f"{obj.__class__.__name__}({arglist})" - else: - return obj.__class__.__name__ - - -class CompactDataClass: - "A data class whose repr() uses positional rather than keyword arguments." - - def __repr__(self) -> str: - return _compact_dataclass_repr(self) - - -@overload -def typeannotation(cls: Type[T], /) -> Type[T]: ... - - -@overload -def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[Type[T]], Type[T]]: ... - - -@dataclass_transform(eq_default=True, order_default=False) -def typeannotation( - cls: Optional[Type[T]] = None, *, eq: bool = True, order: bool = False -) -> Union[Type[T], Callable[[Type[T]], Type[T]]]: - """ - Returns the same class as was passed in, with dunder methods added based on the fields defined in the class. - - :param cls: The data-class type to transform into a type annotation. - :param eq: Whether to generate functions to support equality comparison. - :param order: Whether to generate functions to support ordering. - :returns: A data-class type, or a wrapper for data-class types. - """ - - def wrap(cls: Type[T]) -> Type[T]: - # mypy fails to equate bound-y functions (first argument interpreted as - # the bound object) with class methods, hence the `ignore` directive. - cls.__repr__ = _compact_dataclass_repr # type: ignore[method-assign] - if not dataclasses.is_dataclass(cls): - cls = dataclasses.dataclass( # type: ignore[call-overload] - cls, - init=True, - repr=False, - eq=eq, - order=order, - unsafe_hash=False, - frozen=True, - ) - return cls - - # see if decorator is used as @typeannotation or @typeannotation() - if cls is None: - # called with parentheses - return wrap - else: - # called without parentheses - return wrap(cls) - - -@typeannotation -class Alias: - "Alternative name of a property, typically used in JSON serialization." - - name: str - - -@typeannotation -class Signed: - "Signedness of an integer type." - - is_signed: bool - - -@typeannotation -class Storage: - "Number of bytes the binary representation of an integer type takes, e.g. 4 bytes for an int32." - - bytes: int - - -@typeannotation -class IntegerRange: - "Minimum and maximum value of an integer. The range is inclusive." - - minimum: int - maximum: int - - -@typeannotation -class Precision: - "Precision of a floating-point value." - - significant_digits: int - decimal_digits: int = 0 - - @property - def integer_digits(self) -> int: - return self.significant_digits - self.decimal_digits - - -@typeannotation -class TimePrecision: - """ - Precision of a timestamp or time interval. - - :param decimal_digits: Number of fractional digits retained in the sub-seconds field for a timestamp. - """ - - decimal_digits: int = 0 - - -@typeannotation -class Length: - "Exact length of a string." - - value: int - - -@typeannotation -class MinLength: - "Minimum length of a string." - - value: int - - -@typeannotation -class MaxLength: - "Maximum length of a string." - - value: int - - -@typeannotation -class SpecialConversion: - "Indicates that the annotated type is subject to custom conversion rules." - - -int8: TypeAlias = Annotated[int, Signed(True), Storage(1), IntegerRange(-128, 127)] -int16: TypeAlias = Annotated[int, Signed(True), Storage(2), IntegerRange(-32768, 32767)] -int32: TypeAlias = Annotated[ - int, - Signed(True), - Storage(4), - IntegerRange(-2147483648, 2147483647), -] -int64: TypeAlias = Annotated[ - int, - Signed(True), - Storage(8), - IntegerRange(-9223372036854775808, 9223372036854775807), -] - -uint8: TypeAlias = Annotated[int, Signed(False), Storage(1), IntegerRange(0, 255)] -uint16: TypeAlias = Annotated[int, Signed(False), Storage(2), IntegerRange(0, 65535)] -uint32: TypeAlias = Annotated[ - int, - Signed(False), - Storage(4), - IntegerRange(0, 4294967295), -] -uint64: TypeAlias = Annotated[ - int, - Signed(False), - Storage(8), - IntegerRange(0, 18446744073709551615), -] - -float32: TypeAlias = Annotated[float, Storage(4)] -float64: TypeAlias = Annotated[float, Storage(8)] - -# maps globals of type Annotated[T, ...] defined in this module to their string names -_auxiliary_types: Dict[object, str] = {} -module = sys.modules[__name__] -for var in dir(module): - typ = getattr(module, var) - if getattr(typ, "__metadata__", None) is not None: - # type is Annotated[T, ...] - _auxiliary_types[typ] = var - - -def get_auxiliary_format(data_type: object) -> Optional[str]: - "Returns the JSON format string corresponding to an auxiliary type." - - return _auxiliary_types.get(data_type) diff --git a/llama_stack/strong_typing/classdef.py b/llama_stack/strong_typing/classdef.py deleted file mode 100644 index 5ead886d4..000000000 --- a/llama_stack/strong_typing/classdef.py +++ /dev/null @@ -1,440 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import copy -import dataclasses -import datetime -import decimal -import enum -import ipaddress -import math -import re -import sys -import types -import typing -import uuid -from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union - -from .auxiliary import ( - Alias, - Annotated, - MaxLength, - Precision, - float32, - float64, - int16, - int32, - int64, -) -from .core import JsonType, Schema -from .docstring import Docstring, DocstringParam -from .inspection import TypeLike -from .serialization import json_to_object, object_to_json - -T = TypeVar("T") - - -@dataclass -class JsonSchemaNode: - title: Optional[str] - description: Optional[str] - - -@dataclass -class JsonSchemaType(JsonSchemaNode): - type: str - format: Optional[str] - - -@dataclass -class JsonSchemaBoolean(JsonSchemaType): - type: Literal["boolean"] - const: Optional[bool] - default: Optional[bool] - examples: Optional[List[bool]] - - -@dataclass -class JsonSchemaInteger(JsonSchemaType): - type: Literal["integer"] - const: Optional[int] - default: Optional[int] - examples: Optional[List[int]] - enum: Optional[List[int]] - minimum: Optional[int] - maximum: Optional[int] - - -@dataclass -class JsonSchemaNumber(JsonSchemaType): - type: Literal["number"] - const: Optional[float] - default: Optional[float] - examples: Optional[List[float]] - minimum: Optional[float] - maximum: Optional[float] - exclusiveMinimum: Optional[float] - exclusiveMaximum: Optional[float] - multipleOf: Optional[float] - - -@dataclass -class JsonSchemaString(JsonSchemaType): - type: Literal["string"] - const: Optional[str] - default: Optional[str] - examples: Optional[List[str]] - enum: Optional[List[str]] - minLength: Optional[int] - maxLength: Optional[int] - - -@dataclass -class JsonSchemaArray(JsonSchemaType): - type: Literal["array"] - items: "JsonSchemaAny" - - -@dataclass -class JsonSchemaObject(JsonSchemaType): - type: Literal["object"] - properties: Optional[Dict[str, "JsonSchemaAny"]] - additionalProperties: Optional[bool] - required: Optional[List[str]] - - -@dataclass -class JsonSchemaRef(JsonSchemaNode): - ref: Annotated[str, Alias("$ref")] - - -@dataclass -class JsonSchemaAllOf(JsonSchemaNode): - allOf: List["JsonSchemaAny"] - - -@dataclass -class JsonSchemaAnyOf(JsonSchemaNode): - anyOf: List["JsonSchemaAny"] - - -@dataclass -class Discriminator: - propertyName: str - mapping: Dict[str, str] - - -@dataclass -class JsonSchemaOneOf(JsonSchemaNode): - oneOf: List["JsonSchemaAny"] - discriminator: Optional[Discriminator] - - -JsonSchemaAny = Union[ - JsonSchemaRef, - JsonSchemaBoolean, - JsonSchemaInteger, - JsonSchemaNumber, - JsonSchemaString, - JsonSchemaArray, - JsonSchemaObject, - JsonSchemaOneOf, -] - - -@dataclass -class JsonSchemaTopLevelObject(JsonSchemaObject): - schema: Annotated[str, Alias("$schema")] - definitions: Optional[Dict[str, JsonSchemaAny]] - - -def integer_range_to_type(min_value: float, max_value: float) -> type: - if min_value >= -(2**15) and max_value < 2**15: - return int16 - elif min_value >= -(2**31) and max_value < 2**31: - return int32 - else: - return int64 - - -def enum_safe_name(name: str) -> str: - name = re.sub(r"\W", "_", name) - is_dunder = name.startswith("__") - is_sunder = name.startswith("_") and name.endswith("_") - if is_dunder or is_sunder: # provide an alternative for dunder and sunder names - name = f"v{name}" - return name - - -def enum_values_to_type( - module: types.ModuleType, - name: str, - values: Dict[str, Any], - title: Optional[str] = None, - description: Optional[str] = None, -) -> Type[enum.Enum]: - enum_class: Type[enum.Enum] = enum.Enum(name, values) # type: ignore - - # assign the newly created type to the same module where the defining class is - enum_class.__module__ = module.__name__ - enum_class.__doc__ = str(Docstring(short_description=title, long_description=description)) - setattr(module, name, enum_class) - - return enum.unique(enum_class) - - -def schema_to_type(schema: Schema, *, module: types.ModuleType, class_name: str) -> TypeLike: - """ - Creates a Python type from a JSON schema. - - :param schema: The JSON schema that the types would correspond to. - :param module: The module in which to create the new types. - :param class_name: The name assigned to the top-level class. - """ - - top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema)) - if top_node.definitions is not None: - for type_name, type_node in top_node.definitions.items(): - type_def = node_to_typedef(module, type_name, type_node) - if type_def.default is not dataclasses.MISSING: - raise TypeError("disallowed: `default` for top-level type definitions") - - type_def.type.__module__ = module.__name__ - setattr(module, type_name, type_def.type) - - return node_to_typedef(module, class_name, top_node).type - - -@dataclass -class TypeDef: - type: TypeLike - default: Any = dataclasses.MISSING - - -def json_to_value(target_type: TypeLike, data: JsonType) -> Any: - if data is not None: - return json_to_object(target_type, data) - else: - return dataclasses.MISSING - - -def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode) -> TypeDef: - if isinstance(node, JsonSchemaRef): - match_obj = re.match(r"^#/definitions/(\w+)$", node.ref) - if not match_obj: - raise ValueError(f"invalid reference: {node.ref}") - - type_name = match_obj.group(1) - return TypeDef(getattr(module, type_name), dataclasses.MISSING) - - elif isinstance(node, JsonSchemaBoolean): - if node.const is not None: - return TypeDef(Literal[node.const], dataclasses.MISSING) - - default = json_to_value(bool, node.default) - return TypeDef(bool, default) - - elif isinstance(node, JsonSchemaInteger): - if node.const is not None: - return TypeDef(Literal[node.const], dataclasses.MISSING) - - integer_type: TypeLike - if node.format == "int16": - integer_type = int16 - elif node.format == "int32": - integer_type = int32 - elif node.format == "int64": - integer_type = int64 - else: - if node.enum is not None: - integer_type = integer_range_to_type(min(node.enum), max(node.enum)) - elif node.minimum is not None and node.maximum is not None: - integer_type = integer_range_to_type(node.minimum, node.maximum) - else: - integer_type = int - - default = json_to_value(integer_type, node.default) - return TypeDef(integer_type, default) - - elif isinstance(node, JsonSchemaNumber): - if node.const is not None: - return TypeDef(Literal[node.const], dataclasses.MISSING) - - number_type: TypeLike - if node.format == "float32": - number_type = float32 - elif node.format == "float64": - number_type = float64 - else: - if ( - node.exclusiveMinimum is not None - and node.exclusiveMaximum is not None - and node.exclusiveMinimum == -node.exclusiveMaximum - ): - integer_digits = round(math.log10(node.exclusiveMaximum)) - else: - integer_digits = None - - if node.multipleOf is not None: - decimal_digits = -round(math.log10(node.multipleOf)) - else: - decimal_digits = None - - if integer_digits is not None and decimal_digits is not None: - number_type = Annotated[ - decimal.Decimal, - Precision(integer_digits + decimal_digits, decimal_digits), - ] - else: - number_type = float - - default = json_to_value(number_type, node.default) - return TypeDef(number_type, default) - - elif isinstance(node, JsonSchemaString): - if node.const is not None: - return TypeDef(Literal[node.const], dataclasses.MISSING) - - string_type: TypeLike - if node.format == "date-time": - string_type = datetime.datetime - elif node.format == "uuid": - string_type = uuid.UUID - elif node.format == "ipv4": - string_type = ipaddress.IPv4Address - elif node.format == "ipv6": - string_type = ipaddress.IPv6Address - - elif node.enum is not None: - string_type = enum_values_to_type( - module, - context, - {enum_safe_name(e): e for e in node.enum}, - title=node.title, - description=node.description, - ) - - elif node.maxLength is not None: - string_type = Annotated[str, MaxLength(node.maxLength)] - else: - string_type = str - - default = json_to_value(string_type, node.default) - return TypeDef(string_type, default) - - elif isinstance(node, JsonSchemaArray): - type_def = node_to_typedef(module, context, node.items) - if type_def.default is not dataclasses.MISSING: - raise TypeError("disallowed: `default` for array element type") - list_type = List[(type_def.type,)] # type: ignore - return TypeDef(list_type, dataclasses.MISSING) - - elif isinstance(node, JsonSchemaObject): - if node.properties is None: - return TypeDef(JsonType, dataclasses.MISSING) - - if node.additionalProperties is None or node.additionalProperties is not False: - raise TypeError("expected: `additionalProperties` equals `false`") - - required = node.required if node.required is not None else [] - - class_name = context - - fields: List[Tuple[str, Any, dataclasses.Field]] = [] - params: Dict[str, DocstringParam] = {} - for prop_name, prop_node in node.properties.items(): - type_def = node_to_typedef(module, f"{class_name}__{prop_name}", prop_node) - if prop_name in required: - prop_type = type_def.type - else: - prop_type = Union[(None, type_def.type)] - fields.append((prop_name, prop_type, dataclasses.field(default=type_def.default))) - prop_desc = prop_node.title or prop_node.description - if prop_desc is not None: - params[prop_name] = DocstringParam(prop_name, prop_desc) - - fields.sort(key=lambda t: t[2].default is not dataclasses.MISSING) - if sys.version_info >= (3, 12): - class_type = dataclasses.make_dataclass(class_name, fields, module=module.__name__) - else: - class_type = dataclasses.make_dataclass(class_name, fields, namespace={"__module__": module.__name__}) - class_type.__doc__ = str( - Docstring( - short_description=node.title, - long_description=node.description, - params=params, - ) - ) - setattr(module, class_name, class_type) - return TypeDef(class_type, dataclasses.MISSING) - - elif isinstance(node, JsonSchemaOneOf): - union_defs = tuple(node_to_typedef(module, context, n) for n in node.oneOf) - if any(d.default is not dataclasses.MISSING for d in union_defs): - raise TypeError("disallowed: `default` for union member type") - union_types = tuple(d.type for d in union_defs) - return TypeDef(Union[union_types], dataclasses.MISSING) - - raise NotImplementedError() - - -@dataclass -class SchemaFlatteningOptions: - qualified_names: bool = False - recursive: bool = False - - -def flatten_schema(schema: Schema, *, options: Optional[SchemaFlatteningOptions] = None) -> Schema: - top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema)) - flattener = SchemaFlattener(options) - obj = flattener.flatten(top_node) - return typing.cast(Schema, object_to_json(obj)) - - -class SchemaFlattener: - options: SchemaFlatteningOptions - - def __init__(self, options: Optional[SchemaFlatteningOptions] = None) -> None: - self.options = options or SchemaFlatteningOptions() - - def flatten(self, source_node: JsonSchemaObject) -> JsonSchemaObject: - if source_node.type != "object": - return source_node - - source_props = source_node.properties or {} - target_props: Dict[str, JsonSchemaAny] = {} - - source_reqs = source_node.required or [] - target_reqs: List[str] = [] - - for name, prop in source_props.items(): - if not isinstance(prop, JsonSchemaObject): - target_props[name] = prop - if name in source_reqs: - target_reqs.append(name) - continue - - if self.options.recursive: - obj = self.flatten(prop) - else: - obj = prop - if obj.properties is not None: - if self.options.qualified_names: - target_props.update((f"{name}.{n}", p) for n, p in obj.properties.items()) - else: - target_props.update(obj.properties.items()) - if obj.required is not None: - if self.options.qualified_names: - target_reqs.extend(f"{name}.{n}" for n in obj.required) - else: - target_reqs.extend(obj.required) - - target_node = copy.copy(source_node) - target_node.properties = target_props or None - target_node.additionalProperties = False - target_node.required = target_reqs or None - return target_node diff --git a/llama_stack/strong_typing/core.py b/llama_stack/strong_typing/core.py deleted file mode 100644 index 501b6a5db..000000000 --- a/llama_stack/strong_typing/core.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -from typing import Dict, List, Union - - -class JsonObject: - "Placeholder type for an unrestricted JSON object." - - -class JsonArray: - "Placeholder type for an unrestricted JSON array." - - -# a JSON type with possible `null` values -JsonType = Union[ - None, - bool, - int, - float, - str, - Dict[str, "JsonType"], - List["JsonType"], -] - -# a JSON type that cannot contain `null` values -StrictJsonType = Union[ - bool, - int, - float, - str, - Dict[str, "StrictJsonType"], - List["StrictJsonType"], -] - -# a meta-type that captures the object type in a JSON schema -Schema = Dict[str, JsonType] diff --git a/llama_stack/strong_typing/deserializer.py b/llama_stack/strong_typing/deserializer.py deleted file mode 100644 index 883590862..000000000 --- a/llama_stack/strong_typing/deserializer.py +++ /dev/null @@ -1,877 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import abc -import base64 -import dataclasses -import datetime -import enum -import inspect -import ipaddress -import sys -import typing -import uuid -from types import ModuleType -from typing import ( - Any, - Callable, - Dict, - Generic, - List, - Literal, - NamedTuple, - Optional, - Set, - Tuple, - Type, - TypeVar, - Union, -) - -from .core import JsonType -from .exception import JsonKeyError, JsonTypeError, JsonValueError -from .inspection import ( - TypeLike, - create_object, - enum_value_types, - evaluate_type, - get_class_properties, - get_class_property, - get_resolved_hints, - is_dataclass_instance, - is_dataclass_type, - is_named_tuple_type, - is_type_annotated, - is_type_literal, - is_type_optional, - unwrap_annotated_type, - unwrap_literal_values, - unwrap_optional_type, -) -from .mapping import python_field_to_json_property -from .name import python_type_to_str - -E = TypeVar("E", bound=enum.Enum) -T = TypeVar("T") -R = TypeVar("R") -K = TypeVar("K") -V = TypeVar("V") - - -class Deserializer(abc.ABC, Generic[T]): - "Parses a JSON value into a Python type." - - def build(self, context: Optional[ModuleType]) -> None: - """ - Creates auxiliary parsers that this parser is depending on. - - :param context: A module context for evaluating types specified as a string. - """ - - @abc.abstractmethod - def parse(self, data: JsonType) -> T: - """ - Parses a JSON value into a Python type. - - :param data: The JSON value to de-serialize. - :returns: The Python object that the JSON value de-serializes to. - """ - - -class NoneDeserializer(Deserializer[None]): - "Parses JSON `null` values into Python `None`." - - def parse(self, data: JsonType) -> None: - if data is not None: - raise JsonTypeError(f"`None` type expects JSON `null` but instead received: {data}") - return None - - -class BoolDeserializer(Deserializer[bool]): - "Parses JSON `boolean` values into Python `bool` type." - - def parse(self, data: JsonType) -> bool: - if not isinstance(data, bool): - raise JsonTypeError(f"`bool` type expects JSON `boolean` data but instead received: {data}") - return bool(data) - - -class IntDeserializer(Deserializer[int]): - "Parses JSON `number` values into Python `int` type." - - def parse(self, data: JsonType) -> int: - if not isinstance(data, int): - raise JsonTypeError(f"`int` type expects integer data as JSON `number` but instead received: {data}") - return int(data) - - -class FloatDeserializer(Deserializer[float]): - "Parses JSON `number` values into Python `float` type." - - def parse(self, data: JsonType) -> float: - if not isinstance(data, float) and not isinstance(data, int): - raise JsonTypeError(f"`int` type expects data as JSON `number` but instead received: {data}") - return float(data) - - -class StringDeserializer(Deserializer[str]): - "Parses JSON `string` values into Python `str` type." - - def parse(self, data: JsonType) -> str: - if not isinstance(data, str): - raise JsonTypeError(f"`str` type expects JSON `string` data but instead received: {data}") - return str(data) - - -class BytesDeserializer(Deserializer[bytes]): - "Parses JSON `string` values of Base64-encoded strings into Python `bytes` type." - - def parse(self, data: JsonType) -> bytes: - if not isinstance(data, str): - raise JsonTypeError(f"`bytes` type expects JSON `string` data but instead received: {data}") - return base64.b64decode(data, validate=True) - - -class DateTimeDeserializer(Deserializer[datetime.datetime]): - "Parses JSON `string` values representing timestamps in ISO 8601 format to Python `datetime` with time zone." - - def parse(self, data: JsonType) -> datetime.datetime: - if not isinstance(data, str): - raise JsonTypeError(f"`datetime` type expects JSON `string` data but instead received: {data}") - - if data.endswith("Z"): - data = f"{data[:-1]}+00:00" # Python's isoformat() does not support military time zones like "Zulu" for UTC - timestamp = datetime.datetime.fromisoformat(data) - if timestamp.tzinfo is None: - raise JsonValueError(f"timestamp lacks explicit time zone designator: {data}") - return timestamp - - -class DateDeserializer(Deserializer[datetime.date]): - "Parses JSON `string` values representing dates in ISO 8601 format to Python `date` type." - - def parse(self, data: JsonType) -> datetime.date: - if not isinstance(data, str): - raise JsonTypeError(f"`date` type expects JSON `string` data but instead received: {data}") - - return datetime.date.fromisoformat(data) - - -class TimeDeserializer(Deserializer[datetime.time]): - "Parses JSON `string` values representing time instances in ISO 8601 format to Python `time` type with time zone." - - def parse(self, data: JsonType) -> datetime.time: - if not isinstance(data, str): - raise JsonTypeError(f"`time` type expects JSON `string` data but instead received: {data}") - - return datetime.time.fromisoformat(data) - - -class UUIDDeserializer(Deserializer[uuid.UUID]): - "Parses JSON `string` values of UUID strings into Python `uuid.UUID` type." - - def parse(self, data: JsonType) -> uuid.UUID: - if not isinstance(data, str): - raise JsonTypeError(f"`UUID` type expects JSON `string` data but instead received: {data}") - return uuid.UUID(data) - - -class IPv4Deserializer(Deserializer[ipaddress.IPv4Address]): - "Parses JSON `string` values of IPv4 address strings into Python `ipaddress.IPv4Address` type." - - def parse(self, data: JsonType) -> ipaddress.IPv4Address: - if not isinstance(data, str): - raise JsonTypeError(f"`IPv4Address` type expects JSON `string` data but instead received: {data}") - return ipaddress.IPv4Address(data) - - -class IPv6Deserializer(Deserializer[ipaddress.IPv6Address]): - "Parses JSON `string` values of IPv6 address strings into Python `ipaddress.IPv6Address` type." - - def parse(self, data: JsonType) -> ipaddress.IPv6Address: - if not isinstance(data, str): - raise JsonTypeError(f"`IPv6Address` type expects JSON `string` data but instead received: {data}") - return ipaddress.IPv6Address(data) - - -class ListDeserializer(Deserializer[List[T]]): - "Recursively de-serializes a JSON array into a Python `list`." - - item_type: Type[T] - item_parser: Deserializer - - def __init__(self, item_type: Type[T]) -> None: - self.item_type = item_type - - def build(self, context: Optional[ModuleType]) -> None: - self.item_parser = _get_deserializer(self.item_type, context) - - def parse(self, data: JsonType) -> List[T]: - if not isinstance(data, list): - type_name = python_type_to_str(self.item_type) - raise JsonTypeError(f"type `List[{type_name}]` expects JSON `array` data but instead received: {data}") - - return [self.item_parser.parse(item) for item in data] - - -class DictDeserializer(Deserializer[Dict[K, V]]): - "Recursively de-serializes a JSON object into a Python `dict`." - - key_type: Type[K] - value_type: Type[V] - value_parser: Deserializer[V] - - def __init__(self, key_type: Type[K], value_type: Type[V]) -> None: - self.key_type = key_type - self.value_type = value_type - self._check_key_type() - - def build(self, context: Optional[ModuleType]) -> None: - self.value_parser = _get_deserializer(self.value_type, context) - - def _check_key_type(self) -> None: - if self.key_type is str: - return - - if issubclass(self.key_type, enum.Enum): - value_types = enum_value_types(self.key_type) - if len(value_types) != 1: - raise JsonTypeError( - f"type `{self.container_type}` has invalid key type, " - f"enumerations must have a consistent member value type but several types found: {value_types}" - ) - value_type = value_types.pop() - if value_type is not str: - f"`type `{self.container_type}` has invalid enumeration key type, expected `enum.Enum` with string values" - return - - raise JsonTypeError( - f"`type `{self.container_type}` has invalid key type, expected `str` or `enum.Enum` with string values" - ) - - @property - def container_type(self) -> str: - key_type_name = python_type_to_str(self.key_type) - value_type_name = python_type_to_str(self.value_type) - return f"Dict[{key_type_name}, {value_type_name}]" - - def parse(self, data: JsonType) -> Dict[K, V]: - if not isinstance(data, dict): - raise JsonTypeError( - f"`type `{self.container_type}` expects JSON `object` data but instead received: {data}" - ) - - return dict( - (self.key_type(key), self.value_parser.parse(value)) # type: ignore[call-arg] - for key, value in data.items() - ) - - -class SetDeserializer(Deserializer[Set[T]]): - "Recursively de-serializes a JSON list into a Python `set`." - - member_type: Type[T] - member_parser: Deserializer - - def __init__(self, member_type: Type[T]) -> None: - self.member_type = member_type - - def build(self, context: Optional[ModuleType]) -> None: - self.member_parser = _get_deserializer(self.member_type, context) - - def parse(self, data: JsonType) -> Set[T]: - if not isinstance(data, list): - type_name = python_type_to_str(self.member_type) - raise JsonTypeError(f"type `Set[{type_name}]` expects JSON `array` data but instead received: {data}") - - return set(self.member_parser.parse(item) for item in data) - - -class TupleDeserializer(Deserializer[Tuple[Any, ...]]): - "Recursively de-serializes a JSON list into a Python `tuple`." - - item_types: Tuple[Type[Any], ...] - item_parsers: Tuple[Deserializer[Any], ...] - - def __init__(self, item_types: Tuple[Type[Any], ...]) -> None: - self.item_types = item_types - - def build(self, context: Optional[ModuleType]) -> None: - self.item_parsers = tuple(_get_deserializer(item_type, context) for item_type in self.item_types) - - @property - def container_type(self) -> str: - type_names = ", ".join(python_type_to_str(item_type) for item_type in self.item_types) - return f"Tuple[{type_names}]" - - def parse(self, data: JsonType) -> Tuple[Any, ...]: - if not isinstance(data, list) or len(data) != len(self.item_parsers): - if not isinstance(data, list): - raise JsonTypeError( - f"type `{self.container_type}` expects JSON `array` data but instead received: {data}" - ) - else: - count = len(self.item_parsers) - raise JsonValueError( - f"type `{self.container_type}` expects a JSON `array` of length {count} but received length {len(data)}" - ) - - return tuple(item_parser.parse(item) for item_parser, item in zip(self.item_parsers, data, strict=False)) - - -class UnionDeserializer(Deserializer): - "De-serializes a JSON value (of any type) into a Python union type." - - member_types: Tuple[type, ...] - member_parsers: Tuple[Deserializer, ...] - - def __init__(self, member_types: Tuple[type, ...]) -> None: - self.member_types = member_types - - def build(self, context: Optional[ModuleType]) -> None: - self.member_parsers = tuple(_get_deserializer(member_type, context) for member_type in self.member_types) - - def parse(self, data: JsonType) -> Any: - for member_parser in self.member_parsers: - # iterate over potential types of discriminated union - try: - return member_parser.parse(data) - except (JsonKeyError, JsonTypeError): - # indicates a required field is missing from JSON dict -OR- the data cannot be cast to the expected type, - # i.e. we don't have the type that we are looking for - continue - - type_names = ", ".join(python_type_to_str(member_type) for member_type in self.member_types) - raise JsonKeyError(f"type `Union[{type_names}]` could not be instantiated from: {data}") - - -def get_literal_properties(typ: type) -> Set[str]: - "Returns the names of all properties in a class that are of a literal type." - - return set( - property_name for property_name, property_type in get_class_properties(typ) if is_type_literal(property_type) - ) - - -def get_discriminating_properties(types: Tuple[type, ...]) -> Set[str]: - "Returns a set of properties with literal type that are common across all specified classes." - - if not types or not all(isinstance(typ, type) for typ in types): - return set() - - props = get_literal_properties(types[0]) - for typ in types[1:]: - props = props & get_literal_properties(typ) - - return props - - -class TaggedUnionDeserializer(Deserializer): - "De-serializes a JSON value with one or more disambiguating properties into a Python union type." - - member_types: Tuple[type, ...] - disambiguating_properties: Set[str] - member_parsers: Dict[Tuple[str, Any], Deserializer] - - def __init__(self, member_types: Tuple[type, ...]) -> None: - self.member_types = member_types - self.disambiguating_properties = get_discriminating_properties(member_types) - - def build(self, context: Optional[ModuleType]) -> None: - self.member_parsers = {} - for member_type in self.member_types: - for property_name in self.disambiguating_properties: - literal_type = get_class_property(member_type, property_name) - if not literal_type: - continue - - for literal_value in unwrap_literal_values(literal_type): - tpl = (property_name, literal_value) - if tpl in self.member_parsers: - raise JsonTypeError( - f"disambiguating property `{property_name}` in type `{self.union_type}` has a duplicate value: {literal_value}" - ) - - self.member_parsers[tpl] = _get_deserializer(member_type, context) - - @property - def union_type(self) -> str: - type_names = ", ".join(python_type_to_str(member_type) for member_type in self.member_types) - return f"Union[{type_names}]" - - def parse(self, data: JsonType) -> Any: - if not isinstance(data, dict): - raise JsonTypeError( - f"tagged union type `{self.union_type}` expects JSON `object` data but instead received: {data}" - ) - - for property_name in self.disambiguating_properties: - disambiguating_value = data.get(property_name) - if disambiguating_value is None: - continue - - member_parser = self.member_parsers.get((property_name, disambiguating_value)) - if member_parser is None: - raise JsonTypeError( - f"disambiguating property value is invalid for tagged union type `{self.union_type}`: {data}" - ) - - return member_parser.parse(data) - - raise JsonTypeError( - f"disambiguating property value is missing for tagged union type `{self.union_type}`: {data}" - ) - - -class LiteralDeserializer(Deserializer): - "De-serializes a JSON value into a Python literal type." - - values: Tuple[Any, ...] - parser: Deserializer - - def __init__(self, values: Tuple[Any, ...]) -> None: - self.values = values - - def build(self, context: Optional[ModuleType]) -> None: - literal_type_tuple = tuple(type(value) for value in self.values) - literal_type_set = set(literal_type_tuple) - if len(literal_type_set) != 1: - value_names = ", ".join(repr(value) for value in self.values) - raise TypeError( - f"type `Literal[{value_names}]` expects consistent literal value types but got: {literal_type_tuple}" - ) - - literal_type = literal_type_set.pop() - self.parser = _get_deserializer(literal_type, context) - - def parse(self, data: JsonType) -> Any: - value = self.parser.parse(data) - if value not in self.values: - value_names = ", ".join(repr(value) for value in self.values) - raise JsonTypeError(f"type `Literal[{value_names}]` could not be instantiated from: {data}") - return value - - -class EnumDeserializer(Deserializer[E]): - "Returns an enumeration instance based on the enumeration value read from a JSON value." - - enum_type: Type[E] - - def __init__(self, enum_type: Type[E]) -> None: - self.enum_type = enum_type - - def parse(self, data: JsonType) -> E: - return self.enum_type(data) - - -class CustomDeserializer(Deserializer[T]): - "Uses the `from_json` class method in class to de-serialize the object from JSON." - - converter: Callable[[JsonType], T] - - def __init__(self, converter: Callable[[JsonType], T]) -> None: - self.converter = converter - - def parse(self, data: JsonType) -> T: - return self.converter(data) - - -class FieldDeserializer(abc.ABC, Generic[T, R]): - """ - Deserializes a JSON property into a Python object field. - - :param property_name: The name of the JSON property to read from a JSON `object`. - :param field_name: The name of the field in a Python class to write data to. - :param parser: A compatible deserializer that can handle the field's type. - """ - - property_name: str - field_name: str - parser: Deserializer[T] - - def __init__(self, property_name: str, field_name: str, parser: Deserializer[T]) -> None: - self.property_name = property_name - self.field_name = field_name - self.parser = parser - - @abc.abstractmethod - def parse_field(self, data: Dict[str, JsonType]) -> R: ... - - -class RequiredFieldDeserializer(FieldDeserializer[T, T]): - "Deserializes a JSON property into a mandatory Python object field." - - def parse_field(self, data: Dict[str, JsonType]) -> T: - if self.property_name not in data: - raise JsonKeyError(f"missing required property `{self.property_name}` from JSON object: {data}") - - return self.parser.parse(data[self.property_name]) - - -class OptionalFieldDeserializer(FieldDeserializer[T, Optional[T]]): - "Deserializes a JSON property into an optional Python object field with a default value of `None`." - - def parse_field(self, data: Dict[str, JsonType]) -> Optional[T]: - value = data.get(self.property_name) - if value is not None: - return self.parser.parse(value) - else: - return None - - -class DefaultFieldDeserializer(FieldDeserializer[T, T]): - "Deserializes a JSON property into a Python object field with an explicit default value." - - default_value: T - - def __init__( - self, - property_name: str, - field_name: str, - parser: Deserializer, - default_value: T, - ) -> None: - super().__init__(property_name, field_name, parser) - self.default_value = default_value - - def parse_field(self, data: Dict[str, JsonType]) -> T: - value = data.get(self.property_name) - if value is not None: - return self.parser.parse(value) - else: - return self.default_value - - -class DefaultFactoryFieldDeserializer(FieldDeserializer[T, T]): - "Deserializes a JSON property into an optional Python object field with an explicit default value factory." - - default_factory: Callable[[], T] - - def __init__( - self, - property_name: str, - field_name: str, - parser: Deserializer[T], - default_factory: Callable[[], T], - ) -> None: - super().__init__(property_name, field_name, parser) - self.default_factory = default_factory - - def parse_field(self, data: Dict[str, JsonType]) -> T: - value = data.get(self.property_name) - if value is not None: - return self.parser.parse(value) - else: - return self.default_factory() - - -class ClassDeserializer(Deserializer[T]): - "Base class for de-serializing class-like types such as data classes, named tuples and regular classes." - - class_type: type - property_parsers: List[FieldDeserializer] - property_fields: Set[str] - - def __init__(self, class_type: Type[T]) -> None: - self.class_type = class_type - - def assign(self, property_parsers: List[FieldDeserializer]) -> None: - self.property_parsers = property_parsers - self.property_fields = set(property_parser.property_name for property_parser in property_parsers) - - def parse(self, data: JsonType) -> T: - if not isinstance(data, dict): - type_name = python_type_to_str(self.class_type) - raise JsonTypeError(f"`type `{type_name}` expects JSON `object` data but instead received: {data}") - - object_data: Dict[str, JsonType] = typing.cast(Dict[str, JsonType], data) - - field_values = {} - for property_parser in self.property_parsers: - field_values[property_parser.field_name] = property_parser.parse_field(object_data) - - if not self.property_fields.issuperset(object_data): - unassigned_names = [name for name in object_data if name not in self.property_fields] - raise JsonKeyError(f"unrecognized fields in JSON object: {unassigned_names}") - - return self.create(**field_values) - - def create(self, **field_values: Any) -> T: - "Instantiates an object with a collection of property values." - - obj: T = create_object(self.class_type) - - # use `setattr` on newly created object instance - for field_name, field_value in field_values.items(): - setattr(obj, field_name, field_value) - return obj - - -class NamedTupleDeserializer(ClassDeserializer[NamedTuple]): - "De-serializes a named tuple from a JSON `object`." - - def build(self, context: Optional[ModuleType]) -> None: - property_parsers: List[FieldDeserializer] = [ - RequiredFieldDeserializer(field_name, field_name, _get_deserializer(field_type, context)) - for field_name, field_type in get_resolved_hints(self.class_type).items() - ] - super().assign(property_parsers) - - def create(self, **field_values: Any) -> NamedTuple: - # mypy fails to deduce that this class returns NamedTuples only, hence the `ignore` directive - return self.class_type(**field_values) # type: ignore[no-any-return] - - -class DataclassDeserializer(ClassDeserializer[T]): - "De-serializes a data class from a JSON `object`." - - def __init__(self, class_type: Type[T]) -> None: - if not dataclasses.is_dataclass(class_type): - raise TypeError("expected: data-class type") - super().__init__(class_type) # type: ignore[arg-type] - - def build(self, context: Optional[ModuleType]) -> None: - property_parsers: List[FieldDeserializer] = [] - resolved_hints = get_resolved_hints(self.class_type) - for field in dataclasses.fields(self.class_type): - field_type = resolved_hints[field.name] - property_name = python_field_to_json_property(field.name, field_type) - - is_optional = is_type_optional(field_type) - has_default = field.default is not dataclasses.MISSING - has_default_factory = field.default_factory is not dataclasses.MISSING - - if is_optional: - required_type: Type[T] = unwrap_optional_type(field_type) - else: - required_type = field_type - - parser = _get_deserializer(required_type, context) - - if has_default: - field_parser: FieldDeserializer = DefaultFieldDeserializer( - property_name, field.name, parser, field.default - ) - elif has_default_factory: - default_factory = typing.cast(Callable[[], Any], field.default_factory) - field_parser = DefaultFactoryFieldDeserializer(property_name, field.name, parser, default_factory) - elif is_optional: - field_parser = OptionalFieldDeserializer(property_name, field.name, parser) - else: - field_parser = RequiredFieldDeserializer(property_name, field.name, parser) - - property_parsers.append(field_parser) - - super().assign(property_parsers) - - -class FrozenDataclassDeserializer(DataclassDeserializer[T]): - "De-serializes a frozen data class from a JSON `object`." - - def create(self, **field_values: Any) -> T: - "Instantiates an object with a collection of property values." - - # create object instance without calling `__init__` - obj: T = create_object(self.class_type) - - # can't use `setattr` on frozen dataclasses, pass member variable values to `__init__` - obj.__init__(**field_values) # type: ignore - return obj - - -class TypedClassDeserializer(ClassDeserializer[T]): - "De-serializes a class with type annotations from a JSON `object` by iterating over class properties." - - def build(self, context: Optional[ModuleType]) -> None: - property_parsers: List[FieldDeserializer] = [] - for field_name, field_type in get_resolved_hints(self.class_type).items(): - property_name = python_field_to_json_property(field_name, field_type) - - is_optional = is_type_optional(field_type) - - if is_optional: - required_type: Type[T] = unwrap_optional_type(field_type) - else: - required_type = field_type - - parser = _get_deserializer(required_type, context) - - if is_optional: - field_parser: FieldDeserializer = OptionalFieldDeserializer(property_name, field_name, parser) - else: - field_parser = RequiredFieldDeserializer(property_name, field_name, parser) - - property_parsers.append(field_parser) - - super().assign(property_parsers) - - -def create_deserializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Deserializer: - """ - Creates a de-serializer engine to produce a Python object from an object obtained from a JSON string. - - When de-serializing a JSON object into a Python object, the following transformations are applied: - - * Fundamental types are parsed as `bool`, `int`, `float` or `str`. - * Date and time types are parsed from the ISO 8601 format with time zone into the corresponding Python type - `datetime`, `date` or `time`. - * Byte arrays are read from a string with Base64 encoding into a `bytes` instance. - * UUIDs are extracted from a UUID string compliant with RFC 4122 into a `uuid.UUID` instance. - * Enumerations are instantiated with a lookup on enumeration value. - * Containers (e.g. `list`, `dict`, `set`, `tuple`) are parsed recursively. - * Complex objects with properties (including data class types) are populated from dictionaries of key-value pairs - using reflection (enumerating type annotations). - - :raises TypeError: A de-serializer engine cannot be constructed for the input type. - """ - - if context is None: - if isinstance(typ, type): - context = sys.modules[typ.__module__] - - return _get_deserializer(typ, context) - - -_CACHE: Dict[Tuple[str, str], Deserializer] = {} - - -def _get_deserializer(typ: TypeLike, context: Optional[ModuleType]) -> Deserializer: - "Creates or re-uses a de-serializer engine to parse an object obtained from a JSON string." - - cache_key = None - - if isinstance(typ, (str, typing.ForwardRef)): - if context is None: - raise TypeError(f"missing context for evaluating type: {typ}") - - if isinstance(typ, str): - if hasattr(context, typ): - cache_key = (context.__name__, typ) - elif isinstance(typ, typing.ForwardRef): - if hasattr(context, typ.__forward_arg__): - cache_key = (context.__name__, typ.__forward_arg__) - - typ = evaluate_type(typ, context) - - typ = unwrap_annotated_type(typ) if is_type_annotated(typ) else typ - - if isinstance(typ, type) and typing.get_origin(typ) is None: - cache_key = (typ.__module__, typ.__name__) - - if cache_key is not None: - deserializer = _CACHE.get(cache_key) - if deserializer is None: - deserializer = _create_deserializer(typ) - - # store de-serializer immediately in cache to avoid stack overflow for recursive types - _CACHE[cache_key] = deserializer - - if isinstance(typ, type): - # use type's own module as context for evaluating member types - context = sys.modules[typ.__module__] - - # create any de-serializers this de-serializer is depending on - deserializer.build(context) - else: - # special forms are not always hashable, create a new de-serializer every time - deserializer = _create_deserializer(typ) - deserializer.build(context) - - return deserializer - - -def _create_deserializer(typ: TypeLike) -> Deserializer: - "Creates a de-serializer engine to parse an object obtained from a JSON string." - - # check for well-known types - if typ is type(None): - return NoneDeserializer() - elif typ is bool: - return BoolDeserializer() - elif typ is int: - return IntDeserializer() - elif typ is float: - return FloatDeserializer() - elif typ is str: - return StringDeserializer() - elif typ is bytes: - return BytesDeserializer() - elif typ is datetime.datetime: - return DateTimeDeserializer() - elif typ is datetime.date: - return DateDeserializer() - elif typ is datetime.time: - return TimeDeserializer() - elif typ is uuid.UUID: - return UUIDDeserializer() - elif typ is ipaddress.IPv4Address: - return IPv4Deserializer() - elif typ is ipaddress.IPv6Address: - return IPv6Deserializer() - - # dynamically-typed collection types - if typ is list: - raise TypeError("explicit item type required: use `List[T]` instead of `list`") - if typ is dict: - raise TypeError("explicit key and value types required: use `Dict[K, V]` instead of `dict`") - if typ is set: - raise TypeError("explicit member type required: use `Set[T]` instead of `set`") - if typ is tuple: - raise TypeError("explicit item type list required: use `Tuple[T, ...]` instead of `tuple`") - - # generic types (e.g. list, dict, set, etc.) - origin_type = typing.get_origin(typ) - if origin_type is list: - (list_item_type,) = typing.get_args(typ) # unpack single tuple element - return ListDeserializer(list_item_type) - elif origin_type is dict: - key_type, value_type = typing.get_args(typ) - return DictDeserializer(key_type, value_type) - elif origin_type is set: - (set_member_type,) = typing.get_args(typ) # unpack single tuple element - return SetDeserializer(set_member_type) - elif origin_type is tuple: - return TupleDeserializer(typing.get_args(typ)) - elif origin_type is Union: - union_args = typing.get_args(typ) - if get_discriminating_properties(union_args): - return TaggedUnionDeserializer(union_args) - else: - return UnionDeserializer(union_args) - elif origin_type is Literal: - return LiteralDeserializer(typing.get_args(typ)) - - if not inspect.isclass(typ): - if is_dataclass_instance(typ): - raise TypeError(f"dataclass type expected but got instance: {typ}") - else: - raise TypeError(f"unable to de-serialize unrecognized type: {typ}") - - if issubclass(typ, enum.Enum): - return EnumDeserializer(typ) - - if is_named_tuple_type(typ): - return NamedTupleDeserializer(typ) - - # check if object has custom serialization method - convert_func = getattr(typ, "from_json", None) - if callable(convert_func): - return CustomDeserializer(convert_func) - - if is_dataclass_type(typ): - dataclass_params = getattr(typ, "__dataclass_params__", None) - if dataclass_params is not None and dataclass_params.frozen: - return FrozenDataclassDeserializer(typ) - else: - return DataclassDeserializer(typ) - - return TypedClassDeserializer(typ) diff --git a/llama_stack/strong_typing/docstring.py b/llama_stack/strong_typing/docstring.py deleted file mode 100644 index 497c9ea82..000000000 --- a/llama_stack/strong_typing/docstring.py +++ /dev/null @@ -1,409 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import builtins -import collections.abc -import dataclasses -import inspect -import re -import sys -import types -import typing -from dataclasses import dataclass -from io import StringIO -from typing import Any, Callable, Dict, Optional, Protocol, Type, TypeVar - -if sys.version_info >= (3, 10): - from typing import TypeGuard -else: - from typing_extensions import TypeGuard - -from .inspection import ( - DataclassInstance, - get_class_properties, - get_signature, - is_dataclass_type, - is_type_enum, -) - -T = TypeVar("T") - - -@dataclass -class DocstringParam: - """ - A parameter declaration in a parameter block. - - :param name: The name of the parameter. - :param description: The description text for the parameter. - """ - - name: str - description: str - param_type: type | str = inspect.Signature.empty - - def __str__(self) -> str: - return f":param {self.name}: {self.description}" - - -@dataclass -class DocstringReturns: - """ - A `returns` declaration extracted from a docstring. - - :param description: The description text for the return value. - """ - - description: str - return_type: type = inspect.Signature.empty - - def __str__(self) -> str: - return f":returns: {self.description}" - - -@dataclass -class DocstringRaises: - """ - A `raises` declaration extracted from a docstring. - - :param typename: The type name of the exception raised. - :param description: The description associated with the exception raised. - """ - - typename: str - description: str - raise_type: type = inspect.Signature.empty - - def __str__(self) -> str: - return f":raises {self.typename}: {self.description}" - - -@dataclass -class Docstring: - """ - Represents the documentation string (a.k.a. docstring) for a type such as a (data) class or function. - - A docstring is broken down into the following components: - * A short description, which is the first block of text in the documentation string, and ends with a double - newline or a parameter block. - * A long description, which is the optional block of text following the short description, and ends with - a parameter block. - * A parameter block of named parameter and description string pairs in ReST-style. - * A `returns` declaration, which adds explanation to the return value. - * A `raises` declaration, which adds explanation to the exception type raised by the function on error. - - When the docstring is attached to a data class, it is understood as the documentation string of the class - `__init__` method. - - :param short_description: The short description text parsed from a docstring. - :param long_description: The long description text parsed from a docstring. - :param params: The parameter block extracted from a docstring. - :param returns: The returns declaration extracted from a docstring. - """ - - short_description: Optional[str] = None - long_description: Optional[str] = None - params: Dict[str, DocstringParam] = dataclasses.field(default_factory=dict) - returns: Optional[DocstringReturns] = None - raises: Dict[str, DocstringRaises] = dataclasses.field(default_factory=dict) - - @property - def full_description(self) -> Optional[str]: - if self.short_description and self.long_description: - return f"{self.short_description}\n\n{self.long_description}" - elif self.short_description: - return self.short_description - else: - return None - - def __str__(self) -> str: - output = StringIO() - - has_description = self.short_description or self.long_description - has_blocks = self.params or self.returns or self.raises - - if has_description: - if self.short_description and self.long_description: - output.write(self.short_description) - output.write("\n\n") - output.write(self.long_description) - elif self.short_description: - output.write(self.short_description) - - if has_blocks: - if has_description: - output.write("\n") - - for param in self.params.values(): - output.write("\n") - output.write(str(param)) - if self.returns: - output.write("\n") - output.write(str(self.returns)) - for raises in self.raises.values(): - output.write("\n") - output.write(str(raises)) - - s = output.getvalue() - output.close() - return s - - -def is_exception(member: object) -> TypeGuard[Type[BaseException]]: - return isinstance(member, type) and issubclass(member, BaseException) - - -def get_exceptions(module: types.ModuleType) -> Dict[str, Type[BaseException]]: - "Returns all exception classes declared in a module." - - return {name: class_type for name, class_type in inspect.getmembers(module, is_exception)} - - -class SupportsDoc(Protocol): - __doc__: Optional[str] - - -def _maybe_unwrap_async_iterator(t): - origin_type = typing.get_origin(t) - if origin_type is collections.abc.AsyncIterator: - return typing.get_args(t)[0] - return t - - -def parse_type(typ: SupportsDoc) -> Docstring: - """ - Parse the docstring of a type into its components. - - :param typ: The type whose documentation string to parse. - :returns: Components of the documentation string. - """ - # Use docstring from the iterator origin type for streaming apis - typ = _maybe_unwrap_async_iterator(typ) - - doc = get_docstring(typ) - if doc is None: - return Docstring() - - docstring = parse_text(doc) - check_docstring(typ, docstring) - - # assign parameter and return types - if is_dataclass_type(typ): - properties = dict(get_class_properties(typing.cast(type, typ))) - - for name, param in docstring.params.items(): - param.param_type = properties[name] - - elif inspect.isfunction(typ): - signature = get_signature(typ) - for name, param in docstring.params.items(): - param.param_type = signature.parameters[name].annotation - if docstring.returns: - docstring.returns.return_type = signature.return_annotation - - # assign exception types - defining_module = inspect.getmodule(typ) - if defining_module: - context: Dict[str, type] = {} - context.update(get_exceptions(builtins)) - context.update(get_exceptions(defining_module)) - for exc_name, exc in docstring.raises.items(): - raise_type = context.get(exc_name) - if raise_type is None: - type_name = getattr(typ, "__qualname__", None) or getattr(typ, "__name__", None) or None - raise TypeError( - f"doc-string exception type `{exc_name}` is not an exception defined in the context of `{type_name}`" - ) - - exc.raise_type = raise_type - - return docstring - - -def parse_text(text: str) -> Docstring: - """ - Parse a ReST-style docstring into its components. - - :param text: The documentation string to parse, typically acquired as `type.__doc__`. - :returns: Components of the documentation string. - """ - - if not text: - return Docstring() - - # find block that starts object metadata block (e.g. `:param p:` or `:returns:`) - text = inspect.cleandoc(text) - match = re.search("^:", text, flags=re.MULTILINE) - if match: - desc_chunk = text[: match.start()] - meta_chunk = text[match.start() :] # noqa: E203 - else: - desc_chunk = text - meta_chunk = "" - - # split description text into short and long description - parts = desc_chunk.split("\n\n", 1) - - # ensure short description has no newlines - short_description = parts[0].strip().replace("\n", " ") or None - - # ensure long description preserves its structure (e.g. preformatted text) - if len(parts) > 1: - long_description = parts[1].strip() or None - else: - long_description = None - - params: Dict[str, DocstringParam] = {} - raises: Dict[str, DocstringRaises] = {} - returns = None - for match in re.finditer(r"(^:.*?)(?=^:|\Z)", meta_chunk, flags=re.DOTALL | re.MULTILINE): - chunk = match.group(0) - if not chunk: - continue - - args_chunk, desc_chunk = chunk.lstrip(":").split(":", 1) - args = args_chunk.split() - desc = re.sub(r"\s+", " ", desc_chunk.strip()) - - if len(args) > 0: - kw = args[0] - if len(args) == 2: - if kw == "param": - params[args[1]] = DocstringParam( - name=args[1], - description=desc, - ) - elif kw == "raise" or kw == "raises": - raises[args[1]] = DocstringRaises( - typename=args[1], - description=desc, - ) - - elif len(args) == 1: - if kw == "return" or kw == "returns": - returns = DocstringReturns(description=desc) - - return Docstring( - long_description=long_description, - short_description=short_description, - params=params, - returns=returns, - raises=raises, - ) - - -def has_default_docstring(typ: SupportsDoc) -> bool: - "Check if class has the auto-generated string assigned by @dataclass." - - if not isinstance(typ, type): - return False - - if is_dataclass_type(typ): - return typ.__doc__ is not None and re.match(f"^{re.escape(typ.__name__)}[(].*[)]$", typ.__doc__) is not None - - if is_type_enum(typ): - return typ.__doc__ is not None and typ.__doc__ == "An enumeration." - - return False - - -def has_docstring(typ: SupportsDoc) -> bool: - "Check if class has a documentation string other than the auto-generated string assigned by @dataclass." - - if has_default_docstring(typ): - return False - - return bool(typ.__doc__) - - -def get_docstring(typ: SupportsDoc) -> Optional[str]: - if typ.__doc__ is None: - return None - - if has_default_docstring(typ): - return None - - return typ.__doc__ - - -def check_docstring(typ: SupportsDoc, docstring: Docstring, strict: bool = False) -> None: - """ - Verifies the doc-string of a type. - - :raises TypeError: Raised on a mismatch between doc-string parameters, and function or type signature. - """ - - if is_dataclass_type(typ): - check_dataclass_docstring(typ, docstring, strict) - elif inspect.isfunction(typ): - check_function_docstring(typ, docstring, strict) - - -def check_dataclass_docstring(typ: Type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None: - """ - Verifies the doc-string of a data-class type. - - :param strict: Whether to check if all data-class members have doc-strings. - :raises TypeError: Raised on a mismatch between doc-string parameters and data-class members. - """ - - if not is_dataclass_type(typ): - raise TypeError("not a data-class type") - - properties = dict(get_class_properties(typ)) - class_name = typ.__name__ - - for name in docstring.params: - if name not in properties: - raise TypeError(f"doc-string parameter `{name}` is not a member of the data-class `{class_name}`") - - if not strict: - return - - for name in properties: - if name not in docstring.params: - raise TypeError(f"member `{name}` in data-class `{class_name}` is missing its doc-string") - - -def check_function_docstring(fn: Callable[..., Any], docstring: Docstring, strict: bool = False) -> None: - """ - Verifies the doc-string of a function or member function. - - :param strict: Whether to check if all function parameters and the return type have doc-strings. - :raises TypeError: Raised on a mismatch between doc-string parameters and function signature. - """ - - signature = get_signature(fn) - func_name = fn.__qualname__ - - for name in docstring.params: - if name not in signature.parameters: - raise TypeError(f"doc-string parameter `{name}` is absent from signature of function `{func_name}`") - - if docstring.returns is not None and signature.return_annotation is inspect.Signature.empty: - raise TypeError(f"doc-string has returns description in function `{func_name}` with no return type annotation") - - if not strict: - return - - for name, param in signature.parameters.items(): - # ignore `self` in member function signatures - if name == "self" and ( - param.kind is inspect.Parameter.POSITIONAL_ONLY or param.kind is inspect.Parameter.POSITIONAL_OR_KEYWORD - ): - continue - - if name not in docstring.params: - raise TypeError(f"function parameter `{name}` in `{func_name}` is missing its doc-string") - - if signature.return_annotation is not inspect.Signature.empty and docstring.returns is None: - raise TypeError(f"function `{func_name}` has no returns description in its doc-string") diff --git a/llama_stack/strong_typing/exception.py b/llama_stack/strong_typing/exception.py deleted file mode 100644 index af037cc3c..000000000 --- a/llama_stack/strong_typing/exception.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - - -class JsonKeyError(Exception): - "Raised when deserialization for a class or union type has failed because a matching member was not found." - - -class JsonValueError(Exception): - "Raised when (de)serialization of data has failed due to invalid value." - - -class JsonTypeError(Exception): - "Raised when deserialization of data has failed due to a type mismatch." diff --git a/llama_stack/strong_typing/inspection.py b/llama_stack/strong_typing/inspection.py deleted file mode 100644 index f3a4bef90..000000000 --- a/llama_stack/strong_typing/inspection.py +++ /dev/null @@ -1,1085 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import dataclasses -import datetime -import enum -import importlib -import importlib.machinery -import importlib.util -import inspect -import re -import sys -import types -import typing -import uuid -from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Literal, - NamedTuple, - Optional, - Protocol, - Set, - Tuple, - Type, - TypeVar, - Union, - runtime_checkable, -) - -if sys.version_info >= (3, 9): - from typing import Annotated -else: - from typing_extensions import Annotated - -if sys.version_info >= (3, 10): - from typing import TypeGuard -else: - from typing_extensions import TypeGuard - - -from pydantic import BaseModel -from pydantic.fields import FieldInfo - -S = TypeVar("S") -T = TypeVar("T") -K = TypeVar("K") -V = TypeVar("V") - - -def _is_type_like(data_type: object) -> bool: - """ - Checks if the object is a type or type-like object (e.g. generic type). - - :param data_type: The object to validate. - :returns: True if the object is a type or type-like object. - """ - - if isinstance(data_type, type): - # a standard type - return True - elif typing.get_origin(data_type) is not None: - # a generic type such as `list`, `dict` or `set` - return True - elif hasattr(data_type, "__forward_arg__"): - # an instance of `ForwardRef` - return True - elif data_type is Any: - # the special form `Any` - return True - else: - return False - - -if sys.version_info >= (3, 9): - TypeLike = Union[type, types.GenericAlias, typing.ForwardRef, Any] - - def is_type_like( - data_type: object, - ) -> TypeGuard[TypeLike]: - """ - Checks if the object is a type or type-like object (e.g. generic type). - - :param data_type: The object to validate. - :returns: True if the object is a type or type-like object. - """ - - return _is_type_like(data_type) - -else: - TypeLike = object - - def is_type_like( - data_type: object, - ) -> bool: - return _is_type_like(data_type) - - -def evaluate_member_type(typ: Any, cls: type) -> Any: - """ - Evaluates a forward reference type in a dataclass member. - - :param typ: The dataclass member type to convert. - :param cls: The dataclass in which the member is defined. - :returns: The evaluated type. - """ - - return evaluate_type(typ, sys.modules[cls.__module__]) - - -def evaluate_type(typ: Any, module: types.ModuleType) -> Any: - """ - Evaluates a forward reference type. - - :param typ: The type to convert, typically a dataclass member type. - :param module: The context for the type, i.e. the module in which the member is defined. - :returns: The evaluated type. - """ - - if isinstance(typ, str): - # evaluate data-class field whose type annotation is a string - return eval(typ, module.__dict__, locals()) - if isinstance(typ, typing.ForwardRef): - if sys.version_info >= (3, 9): - return typ._evaluate(module.__dict__, locals(), recursive_guard=frozenset()) - else: - return typ._evaluate(module.__dict__, locals()) - else: - return typ - - -@runtime_checkable -class DataclassInstance(Protocol): - __dataclass_fields__: typing.ClassVar[Dict[str, dataclasses.Field]] - - -def is_dataclass_type(typ: Any) -> TypeGuard[Type[DataclassInstance]]: - "True if the argument corresponds to a data class type (but not an instance)." - - typ = unwrap_annotated_type(typ) - return isinstance(typ, type) and dataclasses.is_dataclass(typ) - - -def is_dataclass_instance(obj: Any) -> TypeGuard[DataclassInstance]: - "True if the argument corresponds to a data class instance (but not a type)." - - return not isinstance(obj, type) and dataclasses.is_dataclass(obj) - - -@dataclasses.dataclass -class DataclassField: - name: str - type: Any - default: Any - - def __init__(self, name: str, type: Any, default: Any = dataclasses.MISSING) -> None: - self.name = name - self.type = type - self.default = default - - -def dataclass_fields(cls: Type[DataclassInstance]) -> Iterable[DataclassField]: - "Generates the fields of a data-class resolving forward references." - - for field in dataclasses.fields(cls): - yield DataclassField(field.name, evaluate_member_type(field.type, cls), field.default) - - -def dataclass_field_by_name(cls: Type[DataclassInstance], name: str) -> DataclassField: - "Looks up a field in a data-class by its field name." - - for field in dataclasses.fields(cls): - if field.name == name: - return DataclassField(field.name, evaluate_member_type(field.type, cls)) - - raise LookupError(f"field `{name}` missing from class `{cls.__name__}`") - - -def is_named_tuple_instance(obj: Any) -> TypeGuard[NamedTuple]: - "True if the argument corresponds to a named tuple instance." - - return is_named_tuple_type(type(obj)) - - -def is_named_tuple_type(typ: Any) -> TypeGuard[Type[NamedTuple]]: - """ - True if the argument corresponds to a named tuple type. - - Calling the function `collections.namedtuple` gives a new type that is a subclass of `tuple` (and no other classes) - with a member named `_fields` that is a tuple whose items are all strings. - """ - - if not isinstance(typ, type): - return False - - typ = unwrap_annotated_type(typ) - - b = getattr(typ, "__bases__", None) - if b is None: - return False - - if len(b) != 1 or b[0] != tuple: - return False - - f = getattr(typ, "_fields", None) - if not isinstance(f, tuple): - return False - - return all(isinstance(n, str) for n in f) - - -if sys.version_info >= (3, 11): - - def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]: - "True if the specified type is an enumeration type." - - typ = unwrap_annotated_type(typ) - return isinstance(typ, enum.EnumType) - -else: - - def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]: - "True if the specified type is an enumeration type." - - typ = unwrap_annotated_type(typ) - - # use an explicit isinstance(..., type) check to filter out special forms like generics - return isinstance(typ, type) and issubclass(typ, enum.Enum) - - -def enum_value_types(enum_type: Type[enum.Enum]) -> List[type]: - """ - Returns all unique value types of the `enum.Enum` type in definition order. - """ - - # filter unique enumeration value types by keeping definition order - return list(dict.fromkeys(type(e.value) for e in enum_type)) - - -def extend_enum( - source: Type[enum.Enum], -) -> Callable[[Type[enum.Enum]], Type[enum.Enum]]: - """ - Creates a new enumeration type extending the set of values in an existing type. - - :param source: The existing enumeration type to be extended with new values. - :returns: A new enumeration type with the extended set of values. - """ - - def wrap(extend: Type[enum.Enum]) -> Type[enum.Enum]: - # create new enumeration type combining the values from both types - values: Dict[str, Any] = {} - values.update((e.name, e.value) for e in source) - values.update((e.name, e.value) for e in extend) - # mypy fails to determine that __name__ is always a string; hence the `ignore` directive. - enum_class: Type[enum.Enum] = enum.Enum(extend.__name__, values) # type: ignore[misc] - - # assign the newly created type to the same module where the extending class is defined - enum_class.__module__ = extend.__module__ - enum_class.__doc__ = extend.__doc__ - setattr(sys.modules[extend.__module__], extend.__name__, enum_class) - - return enum.unique(enum_class) - - return wrap - - -if sys.version_info >= (3, 10): - - def _is_union_like(typ: object) -> bool: - "True if type is a union such as `Union[T1, T2, ...]` or a union type `T1 | T2`." - - return typing.get_origin(typ) is Union or isinstance(typ, types.UnionType) - -else: - - def _is_union_like(typ: object) -> bool: - "True if type is a union such as `Union[T1, T2, ...]` or a union type `T1 | T2`." - - return typing.get_origin(typ) is Union - - -def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[Type[Optional[Any]]]: - """ - True if the type annotation corresponds to an optional type (e.g. `Optional[T]` or `Union[T1,T2,None]`). - - `Optional[T]` is represented as `Union[T, None]` is classic style, and is equivalent to `T | None` in new style. - - :param strict: True if only `Optional[T]` qualifies as an optional type but `Union[T1, T2, None]` does not. - """ - - typ = unwrap_annotated_type(typ) - - if _is_union_like(typ): - args = typing.get_args(typ) - if strict and len(args) != 2: - return False - - return type(None) in args - - return False - - -def unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]: - """ - Extracts the inner type of an optional type. - - :param typ: The optional type `Optional[T]`. - :returns: The inner type `T`. - """ - - return rewrap_annotated_type(_unwrap_optional_type, typ) - - -def _unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]: - "Extracts the type qualified as optional (e.g. returns `T` for `Optional[T]`)." - - # Optional[T] is represented internally as Union[T, None] - if not _is_union_like(typ): - raise TypeError("optional type must have un-subscripted type of Union") - - # will automatically unwrap Union[T] into T - return Union[tuple(filter(lambda item: item is not type(None), typing.get_args(typ)))] # type: ignore[return-value] - - -def is_type_union(typ: object) -> bool: - "True if the type annotation corresponds to a union type (e.g. `Union[T1,T2,T3]`)." - - typ = unwrap_annotated_type(typ) - if _is_union_like(typ): - args = typing.get_args(typ) - return len(args) > 2 or type(None) not in args - - return False - - -def unwrap_union_types(typ: object) -> Tuple[object, ...]: - """ - Extracts the inner types of a union type. - - :param typ: The union type `Union[T1, T2, ...]`. - :returns: The inner types `T1`, `T2`, etc. - """ - - typ = unwrap_annotated_type(typ) - return _unwrap_union_types(typ) - - -def _unwrap_union_types(typ: object) -> Tuple[object, ...]: - "Extracts the types in a union (e.g. returns a tuple of types `T1` and `T2` for `Union[T1, T2]`)." - - if not _is_union_like(typ): - raise TypeError("union type must have un-subscripted type of Union") - - return typing.get_args(typ) - - -def is_type_literal(typ: object) -> bool: - "True if the specified type is a literal of one or more constant values, e.g. `Literal['string']` or `Literal[42]`." - - typ = unwrap_annotated_type(typ) - return typing.get_origin(typ) is Literal - - -def unwrap_literal_value(typ: object) -> Any: - """ - Extracts the single constant value captured by a literal type. - - :param typ: The literal type `Literal[value]`. - :returns: The values captured by the literal type. - """ - - args = unwrap_literal_values(typ) - if len(args) != 1: - raise TypeError("too many values in literal type") - - return args[0] - - -def unwrap_literal_values(typ: object) -> Tuple[Any, ...]: - """ - Extracts the constant values captured by a literal type. - - :param typ: The literal type `Literal[value, ...]`. - :returns: A tuple of values captured by the literal type. - """ - - typ = unwrap_annotated_type(typ) - return typing.get_args(typ) - - -def unwrap_literal_types(typ: object) -> Tuple[type, ...]: - """ - Extracts the types of the constant values captured by a literal type. - - :param typ: The literal type `Literal[value, ...]`. - :returns: A tuple of item types `T` such that `type(value) == T`. - """ - - return tuple(type(t) for t in unwrap_literal_values(typ)) - - -def is_generic_list(typ: object) -> TypeGuard[Type[list]]: - "True if the specified type is a generic list, i.e. `List[T]`." - - typ = unwrap_annotated_type(typ) - return typing.get_origin(typ) is list - - -def unwrap_generic_list(typ: Type[List[T]]) -> Type[T]: - """ - Extracts the item type of a list type. - - :param typ: The list type `List[T]`. - :returns: The item type `T`. - """ - - return rewrap_annotated_type(_unwrap_generic_list, typ) - - -def _unwrap_generic_list(typ: Type[List[T]]) -> Type[T]: - "Extracts the item type of a list type (e.g. returns `T` for `List[T]`)." - - (list_type,) = typing.get_args(typ) # unpack single tuple element - return list_type # type: ignore[no-any-return] - - -def is_generic_set(typ: object) -> TypeGuard[Type[set]]: - "True if the specified type is a generic set, i.e. `Set[T]`." - - typ = unwrap_annotated_type(typ) - return typing.get_origin(typ) is set - - -def unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]: - """ - Extracts the item type of a set type. - - :param typ: The set type `Set[T]`. - :returns: The item type `T`. - """ - - return rewrap_annotated_type(_unwrap_generic_set, typ) - - -def _unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]: - "Extracts the item type of a set type (e.g. returns `T` for `Set[T]`)." - - (set_type,) = typing.get_args(typ) # unpack single tuple element - return set_type # type: ignore[no-any-return] - - -def is_generic_dict(typ: object) -> TypeGuard[Type[dict]]: - "True if the specified type is a generic dictionary, i.e. `Dict[KeyType, ValueType]`." - - typ = unwrap_annotated_type(typ) - return typing.get_origin(typ) is dict - - -def unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]: - """ - Extracts the key and value types of a dictionary type as a tuple. - - :param typ: The dictionary type `Dict[K, V]`. - :returns: The key and value types `K` and `V`. - """ - - return _unwrap_generic_dict(unwrap_annotated_type(typ)) - - -def _unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]: - "Extracts the key and value types of a dict type (e.g. returns (`K`, `V`) for `Dict[K, V]`)." - - key_type, value_type = typing.get_args(typ) - return key_type, value_type - - -def is_type_annotated(typ: TypeLike) -> bool: - "True if the type annotation corresponds to an annotated type (i.e. `Annotated[T, ...]`)." - - return getattr(typ, "__metadata__", None) is not None - - -def get_annotation(data_type: TypeLike, annotation_type: Type[T]) -> Optional[T]: - """ - Returns the first annotation on a data type that matches the expected annotation type. - - :param data_type: The annotated type from which to extract the annotation. - :param annotation_type: The annotation class to look for. - :returns: The annotation class instance found (if any). - """ - - metadata = getattr(data_type, "__metadata__", None) - if metadata is not None: - for annotation in metadata: - if isinstance(annotation, annotation_type): - return annotation - - return None - - -def unwrap_annotated_type(typ: T) -> T: - "Extracts the wrapped type from an annotated type (e.g. returns `T` for `Annotated[T, ...]`)." - - if is_type_annotated(typ): - # type is Annotated[T, ...] - return typing.get_args(typ)[0] # type: ignore[no-any-return] - else: - # type is a regular type - return typ - - -def rewrap_annotated_type(transform: Callable[[Type[S]], Type[T]], typ: Type[S]) -> Type[T]: - """ - Un-boxes, transforms and re-boxes an optionally annotated type. - - :param transform: A function that maps an un-annotated type to another type. - :param typ: A type to un-box (if necessary), transform, and re-box (if necessary). - """ - - metadata = getattr(typ, "__metadata__", None) - if metadata is not None: - # type is Annotated[T, ...] - inner_type = typing.get_args(typ)[0] - else: - # type is a regular type - inner_type = typ - - transformed_type = transform(inner_type) - - if metadata is not None: - return Annotated[(transformed_type, *metadata)] # type: ignore[return-value] - else: - return transformed_type - - -def get_module_classes(module: types.ModuleType) -> List[type]: - "Returns all classes declared directly in a module." - - def is_class_member(member: object) -> TypeGuard[type]: - return inspect.isclass(member) and member.__module__ == module.__name__ - - return [class_type for _, class_type in inspect.getmembers(module, is_class_member)] - - -if sys.version_info >= (3, 9): - - def get_resolved_hints(typ: type) -> Dict[str, type]: - return typing.get_type_hints(typ, include_extras=True) - -else: - - def get_resolved_hints(typ: type) -> Dict[str, type]: - return typing.get_type_hints(typ) - - -def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]: - "Returns all properties of a class." - - if is_dataclass_type(typ): - return ((field.name, field.type) for field in dataclasses.fields(typ)) - elif hasattr(typ, "model_fields"): - # Pydantic BaseModel - use model_fields to exclude ClassVar and other non-field attributes - # Reconstruct Annotated type if discriminator exists to preserve metadata - from typing import Annotated, Any - - from pydantic.fields import FieldInfo - - def get_field_type(name: str, field: Any) -> type | str: - # If field has discriminator, wrap in Annotated to preserve it for schema generation - if field.discriminator: - field_info = FieldInfo(annotation=None, discriminator=field.discriminator) - # Annotated returns _AnnotatedAlias which isn't a type but is valid here - return Annotated[field.annotation, field_info] # type: ignore[return-value] - # field.annotation can be Union types, Annotated, etc. which aren't type but are valid - return field.annotation # type: ignore[return-value,no-any-return] - - return ((name, get_field_type(name, field)) for name, field in typ.model_fields.items()) - else: - resolved_hints = get_resolved_hints(typ) - return resolved_hints.items() - - -def get_class_property(typ: type, name: str) -> Optional[type | str]: - "Looks up the annotated type of a property in a class by its property name." - - for property_name, property_type in get_class_properties(typ): - if name == property_name: - return property_type - return None - - -@dataclasses.dataclass -class _ROOT: - pass - - -def get_referenced_types(typ: TypeLike, module: Optional[types.ModuleType] = None) -> Set[type]: - """ - Extracts types directly or indirectly referenced by this type. - - For example, extract `T` from `List[T]`, `Optional[T]` or `Annotated[T, ...]`, `K` and `V` from `Dict[K,V]`, - `A` and `B` from `Union[A,B]`. - - :param typ: A type or special form. - :param module: The context in which types are evaluated. - :returns: Types referenced by the given type or special form. - """ - - collector = TypeCollector() - collector.run(typ, _ROOT, module) - return collector.references - - -class TypeCollector: - """ - Collects types directly or indirectly referenced by a type. - - :param graph: The type dependency graph, linking types to types they depend on. - """ - - graph: Dict[type, Set[type]] - - @property - def references(self) -> Set[type]: - "Types collected by the type collector." - - dependencies = set() - for edges in self.graph.values(): - dependencies.update(edges) - return dependencies - - def __init__(self) -> None: - self.graph = {_ROOT: set()} - - def traverse(self, typ: type) -> None: - "Finds all dependent types of a type." - - self.run(typ, _ROOT, sys.modules[typ.__module__]) - - def traverse_all(self, types: Iterable[type]) -> None: - "Finds all dependent types of a list of types." - - for typ in types: - self.traverse(typ) - - def run( - self, - typ: TypeLike, - cls: Type[DataclassInstance], - module: Optional[types.ModuleType], - ) -> None: - """ - Extracts types indirectly referenced by this type. - - For example, extract `T` from `List[T]`, `Optional[T]` or `Annotated[T, ...]`, `K` and `V` from `Dict[K,V]`, - `A` and `B` from `Union[A,B]`. - - :param typ: A type or special form. - :param cls: A dataclass type being expanded for dependent types. - :param module: The context in which types are evaluated. - :returns: Types referenced by the given type or special form. - """ - - if typ is type(None) or typ is Any: - return - - if isinstance(typ, type): - self.graph[cls].add(typ) - - if typ in self.graph: - return - - self.graph[typ] = set() - - metadata = getattr(typ, "__metadata__", None) - if metadata is not None: - # type is Annotated[T, ...] - arg = typing.get_args(typ)[0] - return self.run(arg, cls, module) - - # type is a forward reference - if isinstance(typ, str) or isinstance(typ, typing.ForwardRef): - if module is None: - raise ValueError("missing context for evaluating types") - - evaluated_type = evaluate_type(typ, module) - return self.run(evaluated_type, cls, module) - - # type is a special form - origin = typing.get_origin(typ) - if origin in [list, dict, frozenset, set, tuple, Union]: - for arg in typing.get_args(typ): - self.run(arg, cls, module) - return - elif origin is Literal: - return - - # type is optional or a union type - if is_type_optional(typ): - return self.run(unwrap_optional_type(typ), cls, module) - if is_type_union(typ): - for union_type in unwrap_union_types(typ): - self.run(union_type, cls, module) - return - - # type is a regular type - elif is_dataclass_type(typ) or is_type_enum(typ) or isinstance(typ, type): - context = sys.modules[typ.__module__] - if is_dataclass_type(typ): - for field in dataclass_fields(typ): - self.run(field.type, typ, context) - else: - for field_name, field_type in get_resolved_hints(typ).items(): - self.run(field_type, typ, context) - return - - raise TypeError(f"expected: type-like; got: {typ}") - - -if sys.version_info >= (3, 10): - - def get_signature(fn: Callable[..., Any]) -> inspect.Signature: - "Extracts the signature of a function." - - return inspect.signature(fn, eval_str=True) - -else: - - def get_signature(fn: Callable[..., Any]) -> inspect.Signature: - "Extracts the signature of a function." - - return inspect.signature(fn) - - -def is_reserved_property(name: str) -> bool: - "True if the name stands for an internal property." - - # filter built-in and special properties - if re.match(r"^__.+__$", name): - return True - - # filter built-in special names - if name in ["_abc_impl"]: - return True - - return False - - -def create_module(name: str) -> types.ModuleType: - """ - Creates a new module dynamically at run-time. - - :param name: Fully qualified name of the new module (with dot notation). - """ - - if name in sys.modules: - raise KeyError(f"{name!r} already in sys.modules") - - spec = importlib.machinery.ModuleSpec(name, None) - module = importlib.util.module_from_spec(spec) - sys.modules[name] = module - if spec.loader is not None: - spec.loader.exec_module(module) - return module - - -if sys.version_info >= (3, 10): - - def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type: - """ - Creates a new data-class type dynamically. - - :param class_name: The name of new data-class type. - :param fields: A list of fields (and their type) that the new data-class type is expected to have. - :returns: The newly created data-class type. - """ - - # has the `slots` parameter - return dataclasses.make_dataclass(class_name, fields, slots=True) - -else: - - def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type: - """ - Creates a new data-class type dynamically. - - :param class_name: The name of new data-class type. - :param fields: A list of fields (and their type) that the new data-class type is expected to have. - :returns: The newly created data-class type. - """ - - cls = dataclasses.make_dataclass(class_name, fields) - - cls_dict = dict(cls.__dict__) - field_names = tuple(field.name for field in dataclasses.fields(cls)) - - cls_dict["__slots__"] = field_names - - for field_name in field_names: - cls_dict.pop(field_name, None) - cls_dict.pop("__dict__", None) - - qualname = getattr(cls, "__qualname__", None) - cls = type(cls)(cls.__name__, (), cls_dict) - if qualname is not None: - cls.__qualname__ = qualname - - return cls - - -def create_object(typ: Type[T]) -> T: - "Creates an instance of a type." - - if issubclass(typ, Exception): - # exception types need special treatment - e = typ.__new__(typ) - return typing.cast(T, e) - else: - return object.__new__(typ) - - -if sys.version_info >= (3, 9): - TypeOrGeneric = Union[type, types.GenericAlias] - -else: - TypeOrGeneric = object - - -def is_generic_instance(obj: Any, typ: TypeLike) -> bool: - """ - Returns whether an object is an instance of a generic class, a standard class or of a subclass thereof. - - This function checks the following items recursively: - * items of a list - * keys and values of a dictionary - * members of a set - * items of a tuple - * members of a union type - - :param obj: The (possibly generic container) object to check recursively. - :param typ: The expected type of the object. - """ - - if isinstance(typ, typing.ForwardRef): - fwd: typing.ForwardRef = typ - identifier = fwd.__forward_arg__ - typ = eval(identifier) - if isinstance(typ, type): - return isinstance(obj, typ) - else: - return False - - # generic types (e.g. list, dict, set, etc.) - origin_type = typing.get_origin(typ) - if origin_type is list: - if not isinstance(obj, list): - return False - (list_item_type,) = typing.get_args(typ) # unpack single tuple element - list_obj: list = obj - return all(is_generic_instance(item, list_item_type) for item in list_obj) - elif origin_type is dict: - if not isinstance(obj, dict): - return False - key_type, value_type = typing.get_args(typ) - dict_obj: dict = obj - return all( - is_generic_instance(key, key_type) and is_generic_instance(value, value_type) - for key, value in dict_obj.items() - ) - elif origin_type is set: - if not isinstance(obj, set): - return False - (set_member_type,) = typing.get_args(typ) # unpack single tuple element - set_obj: set = obj - return all(is_generic_instance(item, set_member_type) for item in set_obj) - elif origin_type is tuple: - if not isinstance(obj, tuple): - return False - return all( - is_generic_instance(item, tuple_item_type) - for tuple_item_type, item in zip( - (tuple_item_type for tuple_item_type in typing.get_args(typ)), - (item for item in obj), - strict=False, - ) - ) - elif origin_type is Union: - return any(is_generic_instance(obj, member_type) for member_type in typing.get_args(typ)) - elif isinstance(typ, type): - return isinstance(obj, typ) - else: - raise TypeError(f"expected `type` but got: {typ}") - - -class RecursiveChecker: - _pred: Optional[Callable[[type, Any], bool]] - - def __init__(self, pred: Callable[[type, Any], bool]) -> None: - """ - Creates a checker to verify if a predicate applies to all nested member properties of an object recursively. - - :param pred: The predicate to test on member properties. Takes a property type and a property value. - """ - - self._pred = pred - - def pred(self, typ: type, obj: Any) -> bool: - "Acts as a workaround for the type checker mypy." - - assert self._pred is not None - return self._pred(typ, obj) - - def check(self, typ: TypeLike, obj: Any) -> bool: - """ - Checks if a predicate applies to all nested member properties of an object recursively. - - :param typ: The type to recurse into. - :param obj: The object to inspect recursively. Must be an instance of the given type. - :returns: True if all member properties pass the filter predicate. - """ - - # check for well-known types - if ( - typ is type(None) - or typ is bool - or typ is int - or typ is float - or typ is str - or typ is bytes - or typ is datetime.datetime - or typ is datetime.date - or typ is datetime.time - or typ is uuid.UUID - ): - return self.pred(typing.cast(type, typ), obj) - - # generic types (e.g. list, dict, set, etc.) - origin_type = typing.get_origin(typ) - if origin_type is list: - if not isinstance(obj, list): - raise TypeError(f"expected `list` but got: {obj}") - (list_item_type,) = typing.get_args(typ) # unpack single tuple element - list_obj: list = obj - return all(self.check(list_item_type, item) for item in list_obj) - elif origin_type is dict: - if not isinstance(obj, dict): - raise TypeError(f"expected `dict` but got: {obj}") - key_type, value_type = typing.get_args(typ) - dict_obj: dict = obj - return all(self.check(value_type, item) for item in dict_obj.values()) - elif origin_type is set: - if not isinstance(obj, set): - raise TypeError(f"expected `set` but got: {obj}") - (set_member_type,) = typing.get_args(typ) # unpack single tuple element - set_obj: set = obj - return all(self.check(set_member_type, item) for item in set_obj) - elif origin_type is tuple: - if not isinstance(obj, tuple): - raise TypeError(f"expected `tuple` but got: {obj}") - return all( - self.check(tuple_item_type, item) - for tuple_item_type, item in zip( - (tuple_item_type for tuple_item_type in typing.get_args(typ)), - (item for item in obj), - strict=False, - ) - ) - elif origin_type is Union: - return self.pred(typ, obj) # type: ignore[arg-type] - - if not inspect.isclass(typ): - raise TypeError(f"expected `type` but got: {typ}") - - # enumeration type - if issubclass(typ, enum.Enum): - if not isinstance(obj, enum.Enum): - raise TypeError(f"expected `{typ}` but got: {obj}") - return self.pred(typ, obj) - - # class types with properties - if is_named_tuple_type(typ): - if not isinstance(obj, tuple): - raise TypeError(f"expected `NamedTuple` but got: {obj}") - return all( - self.check(field_type, getattr(obj, field_name)) - for field_name, field_type in typing.get_type_hints(typ).items() - ) - elif is_dataclass_type(typ): - if not isinstance(obj, typ): - raise TypeError(f"expected `{typ}` but got: {obj}") - resolved_hints = get_resolved_hints(typ) - return all( - self.check(resolved_hints[field.name], getattr(obj, field.name)) for field in dataclasses.fields(typ) - ) - else: - if not isinstance(obj, typ): - raise TypeError(f"expected `{typ}` but got: {obj}") - return all( - self.check(property_type, getattr(obj, property_name)) - for property_name, property_type in get_class_properties(typ) - ) - - -def check_recursive( - obj: object, - /, - *, - pred: Optional[Callable[[type, Any], bool]] = None, - type_pred: Optional[Callable[[type], bool]] = None, - value_pred: Optional[Callable[[Any], bool]] = None, -) -> bool: - """ - Checks if a predicate applies to all nested member properties of an object recursively. - - :param obj: The object to inspect recursively. - :param pred: The predicate to test on member properties. Takes a property type and a property value. - :param type_pred: Constrains the check to properties of an expected type. Properties of other types pass automatically. - :param value_pred: Verifies a condition on member property values (of an expected type). - :returns: True if all member properties pass the filter predicate(s). - """ - - if type_pred is not None and value_pred is not None: - if pred is not None: - raise TypeError("filter predicate not permitted when type and value predicates are present") - - type_p: Callable[[Type[T]], bool] = type_pred - value_p: Callable[[T], bool] = value_pred - pred = lambda typ, obj: not type_p(typ) or value_p(obj) # noqa: E731 - - elif value_pred is not None: - if pred is not None: - raise TypeError("filter predicate not permitted when value predicate is present") - - value_only_p: Callable[[T], bool] = value_pred - pred = lambda typ, obj: value_only_p(obj) # noqa: E731 - - elif type_pred is not None: - raise TypeError("value predicate required when type predicate is present") - - elif pred is None: - pred = lambda typ, obj: True # noqa: E731 - - return RecursiveChecker(pred).check(type(obj), obj) - - -def is_unwrapped_body_param(param_type: Any) -> bool: - """ - Check if a parameter type represents an unwrapped body parameter. - An unwrapped body parameter is an Annotated type with Body(embed=False) - - This is used to determine whether request parameters should be flattened - in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior). - - Args: - param_type: The parameter type annotation to check - - Returns: - True if the parameter should be treated as an unwrapped body parameter - """ - # Check if it's Annotated with Body(embed=False) - if typing.get_origin(param_type) is Annotated: - args = typing.get_args(param_type) - base_type = args[0] - metadata = args[1:] - - # Look for Body annotation with embed=False - # Body() returns a FieldInfo object, so we check for that type and the embed attribute - for item in metadata: - if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed: - return inspect.isclass(base_type) and issubclass(base_type, BaseModel) - - return False diff --git a/llama_stack/strong_typing/mapping.py b/llama_stack/strong_typing/mapping.py deleted file mode 100644 index 408375a9f..000000000 --- a/llama_stack/strong_typing/mapping.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import keyword -from typing import Optional - -from .auxiliary import Alias -from .inspection import get_annotation - - -def python_field_to_json_property(python_id: str, python_type: Optional[object] = None) -> str: - """ - Map a Python field identifier to a JSON property name. - - Authors may use an underscore appended at the end of a Python identifier as per PEP 8 if it clashes with a Python - keyword: e.g. `in` would become `in_` and `from` would become `from_`. Remove these suffixes when exporting to JSON. - - Authors may supply an explicit alias with the type annotation `Alias`, e.g. `Annotated[MyType, Alias("alias")]`. - """ - - if python_type is not None: - alias = get_annotation(python_type, Alias) - if alias: - return alias.name - - if python_id.endswith("_"): - id = python_id[:-1] - if keyword.iskeyword(id): - return id - - return python_id diff --git a/llama_stack/strong_typing/name.py b/llama_stack/strong_typing/name.py deleted file mode 100644 index a1a2ae5f1..000000000 --- a/llama_stack/strong_typing/name.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import typing -from typing import Any, Literal, Optional, Tuple, Union - -from .auxiliary import _auxiliary_types -from .inspection import ( - TypeLike, - is_generic_dict, - is_generic_list, - is_type_optional, - is_type_union, - unwrap_generic_dict, - unwrap_generic_list, - unwrap_optional_type, - unwrap_union_types, -) - - -class TypeFormatter: - """ - Type formatter. - - :param use_union_operator: Whether to emit union types as `X | Y` as per PEP 604. - """ - - use_union_operator: bool - - def __init__(self, use_union_operator: bool = False) -> None: - self.use_union_operator = use_union_operator - - def union_to_str(self, data_type_args: Tuple[TypeLike, ...]) -> str: - if self.use_union_operator: - return " | ".join(self.python_type_to_str(t) for t in data_type_args) - else: - if len(data_type_args) == 2 and type(None) in data_type_args: - # Optional[T] is represented as Union[T, None] - origin_name = "Optional" - data_type_args = tuple(t for t in data_type_args if t is not type(None)) - else: - origin_name = "Union" - - args = ", ".join(self.python_type_to_str(t) for t in data_type_args) - return f"{origin_name}[{args}]" - - def plain_type_to_str(self, data_type: TypeLike) -> str: - "Returns the string representation of a Python type without metadata." - - # return forward references as the annotation string - if isinstance(data_type, typing.ForwardRef): - fwd: typing.ForwardRef = data_type - return fwd.__forward_arg__ - elif isinstance(data_type, str): - return data_type - - origin = typing.get_origin(data_type) - if origin is not None: - data_type_args = typing.get_args(data_type) - - if origin is dict: # Dict[T] - origin_name = "Dict" - elif origin is list: # List[T] - origin_name = "List" - elif origin is set: # Set[T] - origin_name = "Set" - elif origin is Union: - return self.union_to_str(data_type_args) - elif origin is Literal: - args = ", ".join(repr(arg) for arg in data_type_args) - return f"Literal[{args}]" - else: - origin_name = origin.__name__ - - args = ", ".join(self.python_type_to_str(t) for t in data_type_args) - return f"{origin_name}[{args}]" - - return data_type.__name__ - - def python_type_to_str(self, data_type: TypeLike) -> str: - "Returns the string representation of a Python type." - - if data_type is type(None): - return "None" - - # use compact name for alias types - name = _auxiliary_types.get(data_type) - if name is not None: - return name - - metadata = getattr(data_type, "__metadata__", None) - if metadata is not None: - # type is Annotated[T, ...] - metatuple: Tuple[Any, ...] = metadata - arg = typing.get_args(data_type)[0] - - # check for auxiliary types with user-defined annotations - metaset = set(metatuple) - for auxiliary_type, auxiliary_name in _auxiliary_types.items(): - auxiliary_arg = typing.get_args(auxiliary_type)[0] - if arg is not auxiliary_arg: - continue - - auxiliary_metatuple: Optional[Tuple[Any, ...]] = getattr(auxiliary_type, "__metadata__", None) - if auxiliary_metatuple is None: - continue - - if metaset.issuperset(auxiliary_metatuple): - # type is an auxiliary type with extra annotations - auxiliary_args = ", ".join(repr(m) for m in metatuple if m not in auxiliary_metatuple) - return f"Annotated[{auxiliary_name}, {auxiliary_args}]" - - # type is an annotated type - args = ", ".join(repr(m) for m in metatuple) - return f"Annotated[{self.plain_type_to_str(arg)}, {args}]" - else: - # type is a regular type - return self.plain_type_to_str(data_type) - - -def python_type_to_str(data_type: TypeLike, use_union_operator: bool = False) -> str: - """ - Returns the string representation of a Python type. - - :param use_union_operator: Whether to emit union types as `X | Y` as per PEP 604. - """ - - fmt = TypeFormatter(use_union_operator) - return fmt.python_type_to_str(data_type) - - -def python_type_to_name(data_type: TypeLike, force: bool = False) -> str: - """ - Returns the short name of a Python type. - - :param force: Whether to produce a name for composite types such as generics. - """ - - # use compact name for alias types - name = _auxiliary_types.get(data_type) - if name is not None: - return name - - # unwrap annotated types - metadata = getattr(data_type, "__metadata__", None) - if metadata is not None: - # type is Annotated[T, ...] - arg = typing.get_args(data_type)[0] - return python_type_to_name(arg) - - if force: - # generic types - if is_type_optional(data_type, strict=True): - inner_name = python_type_to_name(unwrap_optional_type(data_type)) - return f"Optional__{inner_name}" - elif is_generic_list(data_type): - item_name = python_type_to_name(unwrap_generic_list(data_type)) - return f"List__{item_name}" - elif is_generic_dict(data_type): - key_type, value_type = unwrap_generic_dict(data_type) - key_name = python_type_to_name(key_type) - value_name = python_type_to_name(value_type) - return f"Dict__{key_name}__{value_name}" - elif is_type_union(data_type): - member_types = unwrap_union_types(data_type) - member_names = "__".join(python_type_to_name(member_type) for member_type in member_types) - return f"Union__{member_names}" - - # named system or user-defined type - if hasattr(data_type, "__name__") and not typing.get_args(data_type): - return data_type.__name__ - - raise TypeError(f"cannot assign a simple name to type: {data_type}") diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py deleted file mode 100644 index f911fc41f..000000000 --- a/llama_stack/strong_typing/schema.py +++ /dev/null @@ -1,792 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import collections.abc -import dataclasses -import datetime -import decimal -import enum -import functools -import inspect -import json -import types -import typing -import uuid -from copy import deepcopy -from typing import ( - Any, - Callable, - ClassVar, - Dict, - List, - Literal, - Optional, - Tuple, - Type, - TypeVar, - Union, - overload, -) - -import jsonschema -from typing_extensions import Annotated - -from . import docstring -from .auxiliary import ( - Alias, - IntegerRange, - MaxLength, - MinLength, - Precision, - get_auxiliary_format, -) -from .core import JsonArray, JsonObject, JsonType, Schema, StrictJsonType -from .inspection import ( - TypeLike, - enum_value_types, - get_annotation, - get_class_properties, - is_type_enum, - is_type_like, - is_type_optional, - unwrap_optional_type, -) -from .name import python_type_to_name -from .serialization import object_to_json - -# determines the maximum number of distinct enum members up to which a Dict[EnumType, Any] is converted into a JSON -# schema with explicitly listed properties (rather than employing a pattern constraint on property names) -OBJECT_ENUM_EXPANSION_LIMIT = 4 - - -T = TypeVar("T") - - -def get_class_docstrings(data_type: type) -> Tuple[Optional[str], Optional[str]]: - docstr = docstring.parse_type(data_type) - - # check if class has a doc-string other than the auto-generated string assigned by @dataclass - if docstring.has_default_docstring(data_type): - return None, None - - return docstr.short_description, docstr.long_description - - -def get_class_property_docstrings( - data_type: type, transform_fun: Optional[Callable[[type, str, str], str]] = None -) -> Dict[str, str]: - """ - Extracts the documentation strings associated with the properties of a composite type. - - :param data_type: The object whose properties to iterate over. - :param transform_fun: An optional function that maps a property documentation string to a custom tailored string. - :returns: A dictionary mapping property names to descriptions. - """ - - result: Dict[str, str] = {} - # Only try to get MRO if data_type is actually a class - # Special types like Literal, Union, etc. don't have MRO - if not inspect.isclass(data_type): - return result - - for base in inspect.getmro(data_type): - docstr = docstring.parse_type(base) - for param in docstr.params.values(): - if param.name in result: - continue - - if transform_fun: - description = transform_fun(data_type, param.name, param.description) - else: - description = param.description - - result[param.name] = description - return result - - -def docstring_to_schema(data_type: type) -> Schema: - short_description, long_description = get_class_docstrings(data_type) - schema: Schema = { - "title": python_type_to_name(data_type), - } - - description = "\n".join(filter(None, [short_description, long_description])) - if description: - schema["description"] = description - return schema - - -def id_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> str: - "Extracts the name of a possibly forward-referenced type." - - if isinstance(data_type, typing.ForwardRef): - forward_type: typing.ForwardRef = data_type - return forward_type.__forward_arg__ - elif isinstance(data_type, str): - return data_type - else: - return data_type.__name__ - - -def type_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> Tuple[str, type]: - "Creates a type from a forward reference." - - if isinstance(data_type, typing.ForwardRef): - forward_type: typing.ForwardRef = data_type - true_type = eval(forward_type.__forward_code__) - return forward_type.__forward_arg__, true_type - elif isinstance(data_type, str): - true_type = eval(data_type) - return data_type, true_type - else: - return data_type.__name__, data_type - - -@dataclasses.dataclass -class TypeCatalogEntry: - schema: Optional[Schema] - identifier: str - examples: Optional[JsonType] = None - - -class TypeCatalog: - "Maintains an association of well-known Python types to their JSON schema." - - _by_type: Dict[TypeLike, TypeCatalogEntry] - _by_name: Dict[str, TypeCatalogEntry] - - def __init__(self) -> None: - self._by_type = {} - self._by_name = {} - - def __contains__(self, data_type: TypeLike) -> bool: - if isinstance(data_type, typing.ForwardRef): - fwd: typing.ForwardRef = data_type - name = fwd.__forward_arg__ - return name in self._by_name - else: - return data_type in self._by_type - - def add( - self, - data_type: TypeLike, - schema: Optional[Schema], - identifier: str, - examples: Optional[List[JsonType]] = None, - ) -> None: - if isinstance(data_type, typing.ForwardRef): - raise TypeError("forward references cannot be used to register a type") - - if data_type in self._by_type: - raise ValueError(f"type {data_type} is already registered in the catalog") - - entry = TypeCatalogEntry(schema, identifier, examples) - self._by_type[data_type] = entry - self._by_name[identifier] = entry - - def get(self, data_type: TypeLike) -> TypeCatalogEntry: - if isinstance(data_type, typing.ForwardRef): - fwd: typing.ForwardRef = data_type - name = fwd.__forward_arg__ - return self._by_name[name] - else: - return self._by_type[data_type] - - -@dataclasses.dataclass -class SchemaOptions: - definitions_path: str = "#/definitions/" - use_descriptions: bool = True - use_examples: bool = True - property_description_fun: Optional[Callable[[type, str, str], str]] = None - - -class JsonSchemaGenerator: - "Creates a JSON schema with user-defined type definitions." - - type_catalog: ClassVar[TypeCatalog] = TypeCatalog() - types_used: Dict[str, TypeLike] - options: SchemaOptions - - def __init__(self, options: Optional[SchemaOptions] = None): - if options is None: - self.options = SchemaOptions() - else: - self.options = options - self.types_used = {} - - @functools.singledispatchmethod - def _metadata_to_schema(self, arg: object) -> Schema: - # unrecognized annotation - return {} - - @_metadata_to_schema.register - def _(self, arg: IntegerRange) -> Schema: - return {"minimum": arg.minimum, "maximum": arg.maximum} - - @_metadata_to_schema.register - def _(self, arg: Precision) -> Schema: - return { - "multipleOf": 10 ** (-arg.decimal_digits), - "exclusiveMinimum": -(10**arg.integer_digits), - "exclusiveMaximum": (10**arg.integer_digits), - } - - @_metadata_to_schema.register - def _(self, arg: MinLength) -> Schema: - return {"minLength": arg.value} - - @_metadata_to_schema.register - def _(self, arg: MaxLength) -> Schema: - return {"maxLength": arg.value} - - def _with_metadata(self, type_schema: Schema, metadata: Optional[Tuple[Any, ...]]) -> Schema: - if metadata: - for m in metadata: - type_schema.update(self._metadata_to_schema(m)) - return type_schema - - def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: Optional[dict] = None) -> Optional[Schema]: - """ - Returns the JSON schema associated with a simple, unrestricted type. - - :returns: The schema for a simple type, or `None`. - """ - - if typ is type(None): - return {"type": "null"} - elif typ is bool: - return {"type": "boolean"} - elif typ is int: - return {"type": "integer"} - elif typ is float: - return {"type": "number"} - elif typ is str: - if json_schema_extra and "contentEncoding" in json_schema_extra: - return { - "type": "string", - "contentEncoding": json_schema_extra["contentEncoding"], - } - return {"type": "string"} - elif typ is bytes: - return {"type": "string", "contentEncoding": "base64"} - elif typ is datetime.datetime: - # 2018-11-13T20:20:39+00:00 - return { - "type": "string", - "format": "date-time", - } - elif typ is datetime.date: - # 2018-11-13 - return {"type": "string", "format": "date"} - elif typ is datetime.time: - # 20:20:39+00:00 - return {"type": "string", "format": "time"} - elif typ is decimal.Decimal: - return {"type": "number"} - elif typ is uuid.UUID: - # f81d4fae-7dec-11d0-a765-00a0c91e6bf6 - return {"type": "string", "format": "uuid"} - elif typ is Any: - return { - "oneOf": [ - {"type": "null"}, - {"type": "boolean"}, - {"type": "number"}, - {"type": "string"}, - {"type": "array"}, - {"type": "object"}, - ] - } - elif typ is JsonObject: - return {"type": "object"} - elif typ is JsonArray: - return {"type": "array"} - else: - # not a simple type - return None - - def type_to_schema( - self, - data_type: TypeLike, - force_expand: bool = False, - json_schema_extra: Optional[dict] = None, - ) -> Schema: - common_info = {} - if json_schema_extra and "deprecated" in json_schema_extra: - common_info["deprecated"] = json_schema_extra["deprecated"] - return self._type_to_schema(data_type, force_expand, json_schema_extra) | common_info - - def _type_to_schema( - self, - data_type: TypeLike, - force_expand: bool = False, - json_schema_extra: Optional[dict] = None, - ) -> Schema: - """ - Returns the JSON schema associated with a type. - - :param data_type: The Python type whose JSON schema to return. - :param force_expand: Forces a JSON schema to be returned even if the type is registered in the catalog of known types. - :returns: The JSON schema associated with the type. - """ - - # short-circuit for common simple types - schema = self._simple_type_to_schema(data_type, json_schema_extra) - if schema is not None: - return schema - - # types registered in the type catalog of well-known types - type_catalog = JsonSchemaGenerator.type_catalog - if not force_expand and data_type in type_catalog: - # user-defined type - identifier = type_catalog.get(data_type).identifier - self.types_used.setdefault(identifier, data_type) - return {"$ref": f"{self.options.definitions_path}{identifier}"} - - # unwrap annotated types - metadata = getattr(data_type, "__metadata__", None) - if metadata is not None: - # type is Annotated[T, ...] - typ = typing.get_args(data_type)[0] - schema = self._simple_type_to_schema(typ) - if schema is not None: - # recognize well-known auxiliary types - fmt = get_auxiliary_format(data_type) - if fmt is not None: - schema.update({"format": fmt}) - return schema - else: - return self._with_metadata(schema, metadata) - - else: - # type is a regular type - typ = data_type - - if isinstance(typ, typing.ForwardRef) or isinstance(typ, str): - if force_expand: - identifier, true_type = type_from_ref(typ) - return self.type_to_schema(true_type, force_expand=True) - else: - try: - identifier, true_type = type_from_ref(typ) - self.types_used[identifier] = true_type - except NameError: - identifier = id_from_ref(typ) - - return {"$ref": f"{self.options.definitions_path}{identifier}"} - - if is_type_enum(typ): - enum_type: Type[enum.Enum] = typ - value_types = enum_value_types(enum_type) - if len(value_types) != 1: - raise ValueError( - f"enumerations must have a consistent member value type but several types found: {value_types}" - ) - enum_value_type = value_types.pop() - - enum_schema: Schema - if enum_value_type is bool or enum_value_type is int or enum_value_type is float or enum_value_type is str: - if enum_value_type is bool: - enum_schema_type = "boolean" - elif enum_value_type is int: - enum_schema_type = "integer" - elif enum_value_type is float: - enum_schema_type = "number" - elif enum_value_type is str: - enum_schema_type = "string" - - enum_schema = { - "type": enum_schema_type, - "enum": [object_to_json(e.value) for e in enum_type], - } - if self.options.use_descriptions: - enum_schema.update(docstring_to_schema(typ)) - return enum_schema - else: - enum_schema = self.type_to_schema(enum_value_type) - if self.options.use_descriptions: - enum_schema.update(docstring_to_schema(typ)) - return enum_schema - - origin_type = typing.get_origin(typ) - if origin_type is list: - (list_type,) = typing.get_args(typ) # unpack single tuple element - return {"type": "array", "items": self.type_to_schema(list_type)} - elif origin_type is dict: - key_type, value_type = typing.get_args(typ) - if not (key_type is str or key_type is int or is_type_enum(key_type)): - raise ValueError("`dict` with key type not coercible to `str` is not supported") - - dict_schema: Schema - value_schema = self.type_to_schema(value_type) - if is_type_enum(key_type): - enum_values = [str(e.value) for e in key_type] - if len(enum_values) > OBJECT_ENUM_EXPANSION_LIMIT: - dict_schema = { - "propertyNames": {"pattern": "^(" + "|".join(enum_values) + ")$"}, - "additionalProperties": value_schema, - } - else: - dict_schema = { - "properties": {value: value_schema for value in enum_values}, - "additionalProperties": False, - } - else: - dict_schema = {"additionalProperties": value_schema} - - schema = {"type": "object"} - schema.update(dict_schema) - return schema - elif origin_type is set: - (set_type,) = typing.get_args(typ) # unpack single tuple element - return { - "type": "array", - "items": self.type_to_schema(set_type), - "uniqueItems": True, - } - elif origin_type is tuple: - args = typing.get_args(typ) - return { - "type": "array", - "minItems": len(args), - "maxItems": len(args), - "prefixItems": [self.type_to_schema(member_type) for member_type in args], - } - elif origin_type in (Union, types.UnionType): - discriminator = None - if typing.get_origin(data_type) is Annotated: - discriminator = typing.get_args(data_type)[1].discriminator - ret: Schema = {"oneOf": [self.type_to_schema(union_type) for union_type in typing.get_args(typ)]} - if discriminator: - # for each union type, we need to read the value of the discriminator - mapping: dict[str, JsonType] = {} - for union_type in typing.get_args(typ): - props = self.type_to_schema(union_type, force_expand=True)["properties"] - # mypy is confused here because JsonType allows multiple types, some of them - # not indexable (bool?) or not indexable by string (list?). The correctness of - # types depends on correct model definitions. Hence multiple ignore statements below. - discriminator_value = props[discriminator]["default"] # type: ignore[index,call-overload] - mapping[discriminator_value] = self.type_to_schema(union_type)["$ref"] # type: ignore[index] - - ret["discriminator"] = { - "propertyName": discriminator, - "mapping": mapping, - } - return ret - elif origin_type is Literal: - literal_args = typing.get_args(typ) - if len(literal_args) == 1: - (literal_value,) = literal_args - schema = self.type_to_schema(type(literal_value)) - schema["const"] = literal_value - return schema - elif len(literal_args) > 1: - first_value = literal_args[0] - schema = self.type_to_schema(type(first_value)) - schema["enum"] = list(literal_args) - return schema - else: - return {"enum": []} - elif origin_type is type: - (concrete_type,) = typing.get_args(typ) # unpack single tuple element - return {"const": self.type_to_schema(concrete_type, force_expand=True)} - elif origin_type is collections.abc.AsyncIterator: - (concrete_type,) = typing.get_args(typ) - return self.type_to_schema(concrete_type) - - # dictionary of class attributes - members = dict(inspect.getmembers(typ, lambda a: not inspect.isroutine(a))) - - property_docstrings = get_class_property_docstrings(typ, self.options.property_description_fun) - properties: Dict[str, Schema] = {} - required: List[str] = [] - for property_name, property_type in get_class_properties(typ): - # rename property if an alias name is specified - alias = get_annotation(property_type, Alias) - if alias: - output_name = alias.name - else: - output_name = property_name - - defaults = {} - json_schema_extra = None - if "model_fields" in members: - f = members["model_fields"] - defaults = {k: finfo.default for k, finfo in f.items()} - if output_name in f: - finfo = f[output_name] - json_schema_extra = finfo.json_schema_extra or {} - if finfo.deprecated: - json_schema_extra["deprecated"] = True - - if is_type_optional(property_type): - optional_type: type = unwrap_optional_type(property_type) - property_def = self.type_to_schema(optional_type, json_schema_extra=json_schema_extra) - else: - property_def = self.type_to_schema(property_type, json_schema_extra=json_schema_extra) - required.append(output_name) - - # check if attribute has a default value initializer - if defaults.get(property_name) is not None: - def_value = defaults[property_name] - # check if value can be directly represented in JSON - if isinstance( - def_value, - ( - bool, - int, - float, - str, - enum.Enum, - datetime.datetime, - datetime.date, - datetime.time, - ), - ): - property_def["default"] = object_to_json(def_value) - - # add property docstring if available - property_doc = property_docstrings.get(property_name) - if property_doc: - # print(output_name, property_doc) - property_def.pop("title", None) - property_def["description"] = property_doc - - properties[output_name] = property_def - - schema = {"type": "object"} - if len(properties) > 0: - schema["properties"] = typing.cast(JsonType, properties) - schema["additionalProperties"] = False - if len(required) > 0: - schema["required"] = typing.cast(JsonType, required) - if self.options.use_descriptions: - schema.update(docstring_to_schema(typ)) - return schema - - def _type_to_schema_with_lookup(self, data_type: TypeLike) -> Schema: - """ - Returns the JSON schema associated with a type that may be registered in the catalog of known types. - - :param data_type: The type whose JSON schema we seek. - :returns: The JSON schema associated with the type. - """ - - entry = JsonSchemaGenerator.type_catalog.get(data_type) - if entry.schema is None: - type_schema = self.type_to_schema(data_type, force_expand=True) - else: - type_schema = deepcopy(entry.schema) - - # add descriptive text (if present) - if self.options.use_descriptions: - if isinstance(data_type, type) and not isinstance(data_type, typing.ForwardRef): - type_schema.update(docstring_to_schema(data_type)) - - # add example (if present) - if self.options.use_examples and entry.examples: - type_schema["examples"] = entry.examples - - return type_schema - - def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Tuple[Schema, Dict[str, Schema]]: - """ - Returns the JSON schema associated with a type and any nested types. - - :param data_type: The type whose JSON schema to return. - :param force_expand: True if a full JSON schema is to be returned even for well-known types; false if a schema - reference is to be used for well-known types. - :returns: A tuple of the JSON schema, and a mapping between nested type names and their corresponding schema. - """ - - if not is_type_like(data_type): - raise TypeError(f"expected a type-like object but got: {data_type}") - - self.types_used = {} - try: - type_schema = self.type_to_schema(data_type, force_expand=force_expand) - - types_defined: Dict[str, Schema] = {} - while len(self.types_used) > len(types_defined): - # make a snapshot copy; original collection is going to be modified - types_undefined = { - sub_name: sub_type - for sub_name, sub_type in self.types_used.items() - if sub_name not in types_defined - } - - # expand undefined types, which may lead to additional types to be defined - for sub_name, sub_type in types_undefined.items(): - types_defined[sub_name] = self._type_to_schema_with_lookup(sub_type) - - type_definitions = dict(sorted(types_defined.items())) - finally: - self.types_used = {} - - return type_schema, type_definitions - - -class Validator(enum.Enum): - "Defines constants for JSON schema standards." - - Draft7 = jsonschema.Draft7Validator - Draft201909 = jsonschema.Draft201909Validator - Draft202012 = jsonschema.Draft202012Validator - Latest = jsonschema.Draft202012Validator - - -def classdef_to_schema( - data_type: TypeLike, - options: Optional[SchemaOptions] = None, - validator: Validator = Validator.Latest, -) -> Schema: - """ - Returns the JSON schema corresponding to the given type. - - :param data_type: The Python type used to generate the JSON schema - :returns: A JSON object that you can serialize to a JSON string with json.dump or json.dumps - :raises TypeError: Indicates that the generated JSON schema does not validate against the desired meta-schema. - """ - - # short-circuit with an error message when passing invalid data - if not is_type_like(data_type): - raise TypeError(f"expected a type-like object but got: {data_type}") - - generator = JsonSchemaGenerator(options) - type_schema, type_definitions = generator.classdef_to_schema(data_type) - - class_schema: Schema = {} - if type_definitions: - class_schema["definitions"] = typing.cast(JsonType, type_definitions) - class_schema.update(type_schema) - - validator_id = validator.value.META_SCHEMA["$id"] - try: - validator.value.check_schema(class_schema) - except jsonschema.exceptions.SchemaError: - raise TypeError(f"schema does not validate against meta-schema <{validator_id}>") - - schema = {"$schema": validator_id} - schema.update(class_schema) - return schema - - -def validate_object(data_type: TypeLike, json_dict: JsonType) -> None: - """ - Validates if the JSON dictionary object conforms to the expected type. - - :param data_type: The type to match against. - :param json_dict: A JSON object obtained with `json.load` or `json.loads`. - :raises jsonschema.exceptions.ValidationError: Indicates that the JSON object cannot represent the type. - """ - - schema_dict = classdef_to_schema(data_type) - jsonschema.validate(json_dict, schema_dict, format_checker=jsonschema.FormatChecker()) - - -def print_schema(data_type: type) -> None: - """Pretty-prints the JSON schema corresponding to the type.""" - - s = classdef_to_schema(data_type) - print(json.dumps(s, indent=4)) - - -def get_schema_identifier(data_type: type) -> Optional[str]: - if data_type in JsonSchemaGenerator.type_catalog: - return JsonSchemaGenerator.type_catalog.get(data_type).identifier - else: - return None - - -def register_schema( - data_type: T, - schema: Optional[Schema] = None, - name: Optional[str] = None, - examples: Optional[List[JsonType]] = None, -) -> T: - """ - Associates a type with a JSON schema definition. - - :param data_type: The type to associate with a JSON schema. - :param schema: The schema to associate the type with. Derived automatically if omitted. - :param name: The name used for looking uo the type. Determined automatically if omitted. - :returns: The input type. - """ - - JsonSchemaGenerator.type_catalog.add( - data_type, - schema, - name if name is not None else python_type_to_name(data_type), - examples, - ) - return data_type - - -@overload -def json_schema_type(cls: Type[T], /) -> Type[T]: ... - - -@overload -def json_schema_type(cls: None, *, schema: Optional[Schema] = None) -> Callable[[Type[T]], Type[T]]: ... - - -def json_schema_type( - cls: Optional[Type[T]] = None, - *, - schema: Optional[Schema] = None, - examples: Optional[List[JsonType]] = None, -) -> Union[Type[T], Callable[[Type[T]], Type[T]]]: - """Decorator to add user-defined schema definition to a class.""" - - def wrap(cls: Type[T]) -> Type[T]: - return register_schema(cls, schema, examples=examples) - - # see if decorator is used as @json_schema_type or @json_schema_type() - if cls is None: - # called with parentheses - return wrap - else: - # called as @json_schema_type without parentheses - return wrap(cls) - - -register_schema(JsonObject, name="JsonObject") -register_schema(JsonArray, name="JsonArray") - -register_schema( - JsonType, - name="JsonType", - examples=[ - { - "property1": None, - "property2": True, - "property3": 64, - "property4": "string", - "property5": ["item"], - "property6": {"key": "value"}, - } - ], -) -register_schema( - StrictJsonType, - name="StrictJsonType", - examples=[ - { - "property1": True, - "property2": 64, - "property3": "string", - "property4": ["item"], - "property5": {"key": "value"}, - } - ], -) diff --git a/llama_stack/strong_typing/serialization.py b/llama_stack/strong_typing/serialization.py deleted file mode 100644 index c00a0aad5..000000000 --- a/llama_stack/strong_typing/serialization.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import inspect -import json -import sys -from types import ModuleType -from typing import Any, Optional, TextIO, TypeVar - -from .core import JsonType -from .deserializer import create_deserializer -from .inspection import TypeLike -from .serializer import create_serializer - -T = TypeVar("T") - - -def object_to_json(obj: Any) -> JsonType: - """ - Converts a Python object to a representation that can be exported to JSON. - - * Fundamental types (e.g. numeric types) are written as is. - * Date and time types are serialized in the ISO 8601 format with time zone. - * A byte array is written as a string with Base64 encoding. - * UUIDs are written as a UUID string. - * Enumerations are written as their value. - * Containers (e.g. `list`, `dict`, `set`, `tuple`) are exported recursively. - * Objects with properties (including data class types) are converted to a dictionaries of key-value pairs. - """ - - typ: type = type(obj) - generator = create_serializer(typ) - return generator.generate(obj) - - -def json_to_object(typ: TypeLike, data: JsonType, *, context: Optional[ModuleType] = None) -> object: - """ - Creates an object from a representation that has been de-serialized from JSON. - - When de-serializing a JSON object into a Python object, the following transformations are applied: - - * Fundamental types are parsed as `bool`, `int`, `float` or `str`. - * Date and time types are parsed from the ISO 8601 format with time zone into the corresponding Python type - `datetime`, `date` or `time` - * A byte array is read from a string with Base64 encoding into a `bytes` instance. - * UUIDs are extracted from a UUID string into a `uuid.UUID` instance. - * Enumerations are instantiated with a lookup on enumeration value. - * Containers (e.g. `list`, `dict`, `set`, `tuple`) are parsed recursively. - * Complex objects with properties (including data class types) are populated from dictionaries of key-value pairs - using reflection (enumerating type annotations). - - :raises TypeError: A de-serializing engine cannot be constructed for the input type. - :raises JsonKeyError: Deserialization for a class or union type has failed because a matching member was not found. - :raises JsonTypeError: Deserialization for data has failed due to a type mismatch. - """ - - # use caller context for evaluating types if no context is supplied - if context is None: - this_frame = inspect.currentframe() - if this_frame is not None: - caller_frame = this_frame.f_back - del this_frame - - if caller_frame is not None: - try: - context = sys.modules[caller_frame.f_globals["__name__"]] - finally: - del caller_frame - - parser = create_deserializer(typ, context) - return parser.parse(data) - - -def json_dump_string(json_object: JsonType) -> str: - "Dump an object as a JSON string with a compact representation." - - return json.dumps(json_object, ensure_ascii=False, check_circular=False, separators=(",", ":")) - - -def json_dump(json_object: JsonType, file: TextIO) -> None: - json.dump( - json_object, - file, - ensure_ascii=False, - check_circular=False, - separators=(",", ":"), - ) - file.write("\n") diff --git a/llama_stack/strong_typing/serializer.py b/llama_stack/strong_typing/serializer.py deleted file mode 100644 index 17848c14b..000000000 --- a/llama_stack/strong_typing/serializer.py +++ /dev/null @@ -1,500 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -import abc -import base64 -import datetime -import enum -import functools -import inspect -import ipaddress -import sys -import typing -import uuid -from types import FunctionType, MethodType, ModuleType -from typing import ( - Any, - Callable, - Dict, - Generic, - List, - Literal, - NamedTuple, - Optional, - Set, - Tuple, - Type, - TypeVar, - Union, -) - -from .core import JsonType -from .exception import JsonTypeError, JsonValueError -from .inspection import ( - TypeLike, - enum_value_types, - evaluate_type, - get_class_properties, - get_resolved_hints, - is_dataclass_type, - is_named_tuple_type, - is_reserved_property, - is_type_annotated, - is_type_enum, - unwrap_annotated_type, -) -from .mapping import python_field_to_json_property - -T = TypeVar("T") - - -class Serializer(abc.ABC, Generic[T]): - @abc.abstractmethod - def generate(self, data: T) -> JsonType: ... - - -class NoneSerializer(Serializer[None]): - def generate(self, data: None) -> None: - # can be directly represented in JSON - return None - - -class BoolSerializer(Serializer[bool]): - def generate(self, data: bool) -> bool: - # can be directly represented in JSON - return data - - -class IntSerializer(Serializer[int]): - def generate(self, data: int) -> int: - # can be directly represented in JSON - return data - - -class FloatSerializer(Serializer[float]): - def generate(self, data: float) -> float: - # can be directly represented in JSON - return data - - -class StringSerializer(Serializer[str]): - def generate(self, data: str) -> str: - # can be directly represented in JSON - return data - - -class BytesSerializer(Serializer[bytes]): - def generate(self, data: bytes) -> str: - return base64.b64encode(data).decode("ascii") - - -class DateTimeSerializer(Serializer[datetime.datetime]): - def generate(self, obj: datetime.datetime) -> str: - if obj.tzinfo is None: - raise JsonValueError(f"timestamp lacks explicit time zone designator: {obj}") - fmt = obj.isoformat() - if fmt.endswith("+00:00"): - fmt = f"{fmt[:-6]}Z" # Python's isoformat() does not support military time zones like "Zulu" for UTC - return fmt - - -class DateSerializer(Serializer[datetime.date]): - def generate(self, obj: datetime.date) -> str: - return obj.isoformat() - - -class TimeSerializer(Serializer[datetime.time]): - def generate(self, obj: datetime.time) -> str: - return obj.isoformat() - - -class UUIDSerializer(Serializer[uuid.UUID]): - def generate(self, obj: uuid.UUID) -> str: - return str(obj) - - -class IPv4Serializer(Serializer[ipaddress.IPv4Address]): - def generate(self, obj: ipaddress.IPv4Address) -> str: - return str(obj) - - -class IPv6Serializer(Serializer[ipaddress.IPv6Address]): - def generate(self, obj: ipaddress.IPv6Address) -> str: - return str(obj) - - -class EnumSerializer(Serializer[enum.Enum]): - def generate(self, obj: enum.Enum) -> Union[int, str]: - value = obj.value - if isinstance(value, int): - return value - return str(value) - - -class UntypedListSerializer(Serializer[list]): - def generate(self, obj: list) -> List[JsonType]: - return [object_to_json(item) for item in obj] - - -class UntypedDictSerializer(Serializer[dict]): - def generate(self, obj: dict) -> Dict[str, JsonType]: - if obj and isinstance(next(iter(obj.keys())), enum.Enum): - iterator = ((key.value, object_to_json(value)) for key, value in obj.items()) - else: - iterator = ((str(key), object_to_json(value)) for key, value in obj.items()) - return dict(iterator) - - -class UntypedSetSerializer(Serializer[set]): - def generate(self, obj: set) -> List[JsonType]: - return [object_to_json(item) for item in obj] - - -class UntypedTupleSerializer(Serializer[tuple]): - def generate(self, obj: tuple) -> List[JsonType]: - return [object_to_json(item) for item in obj] - - -class TypedCollectionSerializer(Serializer, Generic[T]): - generator: Serializer[T] - - def __init__(self, item_type: Type[T], context: Optional[ModuleType]) -> None: - self.generator = _get_serializer(item_type, context) - - -class TypedListSerializer(TypedCollectionSerializer[T]): - def generate(self, obj: List[T]) -> List[JsonType]: - return [self.generator.generate(item) for item in obj] - - -class TypedStringDictSerializer(TypedCollectionSerializer[T]): - def __init__(self, value_type: Type[T], context: Optional[ModuleType]) -> None: - super().__init__(value_type, context) - - def generate(self, obj: Dict[str, T]) -> Dict[str, JsonType]: - return {key: self.generator.generate(value) for key, value in obj.items()} - - -class TypedEnumDictSerializer(TypedCollectionSerializer[T]): - def __init__( - self, - key_type: Type[enum.Enum], - value_type: Type[T], - context: Optional[ModuleType], - ) -> None: - super().__init__(value_type, context) - - value_types = enum_value_types(key_type) - if len(value_types) != 1: - raise JsonTypeError( - f"invalid key type, enumerations must have a consistent member value type but several types found: {value_types}" - ) - - value_type = value_types.pop() - if value_type is not str: - raise JsonTypeError("invalid enumeration key type, expected `enum.Enum` with string values") - - def generate(self, obj: Dict[enum.Enum, T]) -> Dict[str, JsonType]: - return {key.value: self.generator.generate(value) for key, value in obj.items()} - - -class TypedSetSerializer(TypedCollectionSerializer[T]): - def generate(self, obj: Set[T]) -> JsonType: - return [self.generator.generate(item) for item in obj] - - -class TypedTupleSerializer(Serializer[tuple]): - item_generators: Tuple[Serializer, ...] - - def __init__(self, item_types: Tuple[type, ...], context: Optional[ModuleType]) -> None: - self.item_generators = tuple(_get_serializer(item_type, context) for item_type in item_types) - - def generate(self, obj: tuple) -> List[JsonType]: - return [item_generator.generate(item) for item_generator, item in zip(self.item_generators, obj, strict=False)] - - -class CustomSerializer(Serializer): - converter: Callable[[object], JsonType] - - def __init__(self, converter: Callable[[object], JsonType]) -> None: - self.converter = converter - - def generate(self, obj: object) -> JsonType: - return self.converter(obj) - - -class FieldSerializer(Generic[T]): - """ - Serializes a Python object field into a JSON property. - - :param field_name: The name of the field in a Python class to read data from. - :param property_name: The name of the JSON property to write to a JSON `object`. - :param generator: A compatible serializer that can handle the field's type. - """ - - field_name: str - property_name: str - generator: Serializer - - def __init__(self, field_name: str, property_name: str, generator: Serializer[T]) -> None: - self.field_name = field_name - self.property_name = property_name - self.generator = generator - - def generate_field(self, obj: object, object_dict: Dict[str, JsonType]) -> None: - value = getattr(obj, self.field_name) - if value is not None: - object_dict[self.property_name] = self.generator.generate(value) - - -class TypedClassSerializer(Serializer[T]): - property_generators: List[FieldSerializer] - - def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None: - self.property_generators = [ - FieldSerializer( - field_name, - python_field_to_json_property(field_name, field_type), - _get_serializer(field_type, context), - ) - for field_name, field_type in get_class_properties(class_type) - ] - - def generate(self, obj: T) -> Dict[str, JsonType]: - object_dict: Dict[str, JsonType] = {} - for property_generator in self.property_generators: - property_generator.generate_field(obj, object_dict) - - return object_dict - - -class TypedNamedTupleSerializer(TypedClassSerializer[NamedTuple]): - def __init__(self, class_type: Type[NamedTuple], context: Optional[ModuleType]) -> None: - super().__init__(class_type, context) - - -class DataclassSerializer(TypedClassSerializer[T]): - def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None: - super().__init__(class_type, context) - - -class UnionSerializer(Serializer): - def generate(self, obj: Any) -> JsonType: - return object_to_json(obj) - - -class LiteralSerializer(Serializer): - generator: Serializer - - def __init__(self, values: Tuple[Any, ...], context: Optional[ModuleType]) -> None: - literal_type_tuple = tuple(type(value) for value in values) - literal_type_set = set(literal_type_tuple) - if len(literal_type_set) != 1: - value_names = ", ".join(repr(value) for value in values) - raise TypeError( - f"type `Literal[{value_names}]` expects consistent literal value types but got: {literal_type_tuple}" - ) - - literal_type = literal_type_set.pop() - self.generator = _get_serializer(literal_type, context) - - def generate(self, obj: Any) -> JsonType: - return self.generator.generate(obj) - - -class UntypedNamedTupleSerializer(Serializer): - fields: Dict[str, str] - - def __init__(self, class_type: Type[NamedTuple]) -> None: - # named tuples are also instances of tuple - self.fields = {} - field_names: Tuple[str, ...] = class_type._fields - for field_name in field_names: - self.fields[field_name] = python_field_to_json_property(field_name) - - def generate(self, obj: NamedTuple) -> JsonType: - object_dict = {} - for field_name, property_name in self.fields.items(): - value = getattr(obj, field_name) - object_dict[property_name] = object_to_json(value) - - return object_dict - - -class UntypedClassSerializer(Serializer): - def generate(self, obj: object) -> JsonType: - # iterate over object attributes to get a standard representation - object_dict = {} - for name in dir(obj): - if is_reserved_property(name): - continue - - value = getattr(obj, name) - if value is None: - continue - - # filter instance methods - if inspect.ismethod(value): - continue - - object_dict[python_field_to_json_property(name)] = object_to_json(value) - - return object_dict - - -def create_serializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Serializer: - """ - Creates a serializer engine to produce an object that can be directly converted into a JSON string. - - When serializing a Python object into a JSON object, the following transformations are applied: - - * Fundamental types (`bool`, `int`, `float` or `str`) are returned as-is. - * Date and time types (`datetime`, `date` or `time`) produce an ISO 8601 format string with time zone - (ending with `Z` for UTC). - * Byte arrays (`bytes`) are written as a string with Base64 encoding. - * UUIDs (`uuid.UUID`) are written as a UUID string as per RFC 4122. - * Enumerations yield their enumeration value. - * Containers (e.g. `list`, `dict`, `set`, `tuple`) are processed recursively. - * Complex objects with properties (including data class types) generate dictionaries of key-value pairs. - - :raises TypeError: A serializer engine cannot be constructed for the input type. - """ - - if context is None: - if isinstance(typ, type): - context = sys.modules[typ.__module__] - - return _get_serializer(typ, context) - - -def _get_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer: - if isinstance(typ, (str, typing.ForwardRef)): - if context is None: - raise TypeError(f"missing context for evaluating type: {typ}") - - typ = evaluate_type(typ, context) - - if isinstance(typ, type): - return _fetch_serializer(typ) - else: - # special forms are not always hashable - return _create_serializer(typ, context) - - -@functools.lru_cache(maxsize=None) -def _fetch_serializer(typ: type) -> Serializer: - context = sys.modules[typ.__module__] - return _create_serializer(typ, context) - - -def _create_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer: - # check for well-known types - if typ is type(None): - return NoneSerializer() - elif typ is bool: - return BoolSerializer() - elif typ is int: - return IntSerializer() - elif typ is float: - return FloatSerializer() - elif typ is str: - return StringSerializer() - elif typ is bytes: - return BytesSerializer() - elif typ is datetime.datetime: - return DateTimeSerializer() - elif typ is datetime.date: - return DateSerializer() - elif typ is datetime.time: - return TimeSerializer() - elif typ is uuid.UUID: - return UUIDSerializer() - elif typ is ipaddress.IPv4Address: - return IPv4Serializer() - elif typ is ipaddress.IPv6Address: - return IPv6Serializer() - - # dynamically-typed collection types - if typ is list: - return UntypedListSerializer() - elif typ is dict: - return UntypedDictSerializer() - elif typ is set: - return UntypedSetSerializer() - elif typ is tuple: - return UntypedTupleSerializer() - - # generic types (e.g. list, dict, set, etc.) - origin_type = typing.get_origin(typ) - if origin_type is list: - (list_item_type,) = typing.get_args(typ) # unpack single tuple element - return TypedListSerializer(list_item_type, context) - elif origin_type is dict: - key_type, value_type = typing.get_args(typ) - if key_type is str: - return TypedStringDictSerializer(value_type, context) - elif issubclass(key_type, enum.Enum): - return TypedEnumDictSerializer(key_type, value_type, context) - elif origin_type is set: - (set_member_type,) = typing.get_args(typ) # unpack single tuple element - return TypedSetSerializer(set_member_type, context) - elif origin_type is tuple: - return TypedTupleSerializer(typing.get_args(typ), context) - elif origin_type is Union: - return UnionSerializer() - elif origin_type is Literal: - return LiteralSerializer(typing.get_args(typ), context) - - if is_type_annotated(typ): - return create_serializer(unwrap_annotated_type(typ)) - - # check if object has custom serialization method - convert_func = getattr(typ, "to_json", None) - if callable(convert_func): - return CustomSerializer(convert_func) - - if is_type_enum(typ): - return EnumSerializer() - if is_dataclass_type(typ): - return DataclassSerializer(typ, context) - if is_named_tuple_type(typ): - if getattr(typ, "__annotations__", None): - return TypedNamedTupleSerializer(typ, context) - else: - return UntypedNamedTupleSerializer(typ) - - # fail early if caller passes an object with an exotic type - if not isinstance(typ, type) or typ is FunctionType or typ is MethodType or typ is type or typ is ModuleType: - raise TypeError(f"object of type {typ} cannot be represented in JSON") - - if get_resolved_hints(typ): - return TypedClassSerializer(typ, context) - else: - return UntypedClassSerializer() - - -def object_to_json(obj: Any) -> JsonType: - """ - Converts a Python object to a representation that can be exported to JSON. - - * Fundamental types (e.g. numeric types) are written as is. - * Date and time types are serialized in the ISO 8601 format with time zone. - * A byte array is written as a string with Base64 encoding. - * UUIDs are written as a UUID string. - * Enumerations are written as their value. - * Containers (e.g. `list`, `dict`, `set`, `tuple`) are exported recursively. - * Objects with properties (including data class types) are converted to a dictionaries of key-value pairs. - """ - - typ: type = type(obj) - generator = create_serializer(typ) - return generator.generate(obj) diff --git a/llama_stack/strong_typing/slots.py b/llama_stack/strong_typing/slots.py deleted file mode 100644 index c1a3293d8..000000000 --- a/llama_stack/strong_typing/slots.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Dict, Tuple, Type, TypeVar - -T = TypeVar("T") - - -class SlotsMeta(type): - def __new__(cls: Type[T], name: str, bases: Tuple[type, ...], ns: Dict[str, Any]) -> T: - # caller may have already provided slots, in which case just retain them and keep going - slots: Tuple[str, ...] = ns.get("__slots__", ()) - - # add fields with type annotations to slots - annotations: Dict[str, Any] = ns.get("__annotations__", {}) - members = tuple(member for member in annotations.keys() if member not in slots) - - # assign slots - ns["__slots__"] = slots + tuple(members) - return super().__new__(cls, name, bases, ns) # type: ignore - - -class Slots(metaclass=SlotsMeta): - pass diff --git a/llama_stack/strong_typing/topological.py b/llama_stack/strong_typing/topological.py deleted file mode 100644 index 28bf4bd0f..000000000 --- a/llama_stack/strong_typing/topological.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -""" -Type-safe data interchange for Python data classes. - -:see: https://github.com/hunyadi/strong_typing -""" - -from typing import Callable, Dict, Iterable, List, Optional, Set, TypeVar - -from .inspection import TypeCollector - -T = TypeVar("T") - - -def topological_sort(graph: Dict[T, Set[T]]) -> List[T]: - """ - Performs a topological sort of a graph. - - Nodes with no outgoing edges are first. Nodes with no incoming edges are last. - The topological ordering is not unique. - - :param graph: A dictionary of mappings from nodes to adjacent nodes. Keys and set members must be hashable. - :returns: The list of nodes in topological order. - """ - - # empty list that will contain the sorted nodes (in reverse order) - ordered: List[T] = [] - - seen: Dict[T, bool] = {} - - def _visit(n: T) -> None: - status = seen.get(n) - if status is not None: - if status: # node has a permanent mark - return - else: # node has a temporary mark - raise RuntimeError(f"cycle detected in graph for node {n}") - - seen[n] = False # apply temporary mark - for m in graph[n]: # visit all adjacent nodes - if m != n: # ignore self-referencing nodes - _visit(m) - - seen[n] = True # apply permanent mark - ordered.append(n) - - for n in graph.keys(): - _visit(n) - - return ordered - - -def type_topological_sort( - types: Iterable[type], - dependency_fn: Optional[Callable[[type], Iterable[type]]] = None, -) -> List[type]: - """ - Performs a topological sort of a list of types. - - Types that don't depend on other types (i.e. fundamental types) are first. Types on which no other types depend - are last. The topological ordering is not unique. - - :param types: A list of types (simple or composite). - :param dependency_fn: Returns a list of additional dependencies for a class (e.g. classes referenced by a foreign key). - :returns: The list of types in topological order. - """ - - if not all(isinstance(typ, type) for typ in types): - raise TypeError("expected a list of types") - - collector = TypeCollector() - collector.traverse_all(types) - graph = collector.graph - - if dependency_fn: - new_types: Set[type] = set() - for source_type, references in graph.items(): - dependent_types = dependency_fn(source_type) - references.update(dependent_types) - new_types.update(dependent_types) - for new_type in new_types: - graph[new_type] = set() - - return topological_sort(graph) diff --git a/llama_stack/testing/__init__.py b/llama_stack/testing/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/testing/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/llama_stack/ui/app/logs/vector-stores/page.tsx deleted file mode 100644 index 72196d496..000000000 --- a/llama_stack/ui/app/logs/vector-stores/page.tsx +++ /dev/null @@ -1,138 +0,0 @@ -"use client"; - -import React from "react"; -import type { - ListVectorStoresResponse, - VectorStore, -} from "llama-stack-client/resources/vector-stores/vector-stores"; -import { useRouter } from "next/navigation"; -import { usePagination } from "@/hooks/use-pagination"; -import { Button } from "@/components/ui/button"; -import { - Table, - TableBody, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "@/components/ui/table"; -import { Skeleton } from "@/components/ui/skeleton"; - -export default function VectorStoresPage() { - const router = useRouter(); - const { - data: stores, - status, - hasMore, - error, - loadMore, - } = usePagination({ - limit: 20, - order: "desc", - fetchFunction: async (client, params) => { - const response = await client.vectorStores.list({ - after: params.after, - limit: params.limit, - order: params.order, - } as Parameters[0]); - return response as ListVectorStoresResponse; - }, - errorMessagePrefix: "vector stores", - }); - - // Auto-load all pages for infinite scroll behavior (like Responses) - React.useEffect(() => { - if (status === "idle" && hasMore) { - loadMore(); - } - }, [status, hasMore, loadMore]); - - const renderContent = () => { - if (status === "loading") { - return ( -
- - - -
- ); - } - - if (status === "error") { - return
Error: {error?.message}
; - } - - if (!stores || stores.length === 0) { - return

No vector stores found.

; - } - - return ( -
- - - - ID - Name - Created - Completed - Cancelled - Failed - In Progress - Total - Usage Bytes - Provider ID - Provider Vector DB ID - - - - {stores.map(store => { - const fileCounts = store.file_counts; - const metadata = store.metadata || {}; - const providerId = metadata.provider_id ?? ""; - const providerDbId = metadata.provider_vector_db_id ?? ""; - - return ( - router.push(`/logs/vector-stores/${store.id}`)} - className="cursor-pointer hover:bg-muted/50" - > - - - - {store.name} - - {new Date(store.created_at * 1000).toLocaleString()} - - {fileCounts.completed} - {fileCounts.cancelled} - {fileCounts.failed} - {fileCounts.in_progress} - {fileCounts.total} - {store.usage_bytes} - {providerId} - {providerDbId} - - ); - })} - -
-
- ); - }; - - return ( -
-

Vector Stores

- {renderContent()} -
- ); -} diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx deleted file mode 100644 index d3d0fa249..000000000 --- a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx +++ /dev/null @@ -1,143 +0,0 @@ -"use client"; - -import { useRouter } from "next/navigation"; -import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; -import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; -import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { Skeleton } from "@/components/ui/skeleton"; -import { Button } from "@/components/ui/button"; -import { - DetailLoadingView, - DetailErrorView, - DetailNotFoundView, - DetailLayout, - PropertiesCard, - PropertyItem, -} from "@/components/layout/detail-layout"; -import { - Table, - TableBody, - TableCaption, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "@/components/ui/table"; - -interface VectorStoreDetailViewProps { - store: VectorStore | null; - files: VectorStoreFile[]; - isLoadingStore: boolean; - isLoadingFiles: boolean; - errorStore: Error | null; - errorFiles: Error | null; - id: string; -} - -export function VectorStoreDetailView({ - store, - files, - isLoadingStore, - isLoadingFiles, - errorStore, - errorFiles, - id, -}: VectorStoreDetailViewProps) { - const title = "Vector Store Details"; - const router = useRouter(); - - const handleFileClick = (fileId: string) => { - router.push(`/logs/vector-stores/${id}/files/${fileId}`); - }; - - if (errorStore) { - return ; - } - if (isLoadingStore) { - return ; - } - if (!store) { - return ; - } - - const mainContent = ( - <> - - - Files - - - {isLoadingFiles ? ( - - ) : errorFiles ? ( -
- Error loading files: {errorFiles.message} -
- ) : files.length > 0 ? ( - - Files in this vector store - - - ID - Status - Created - Usage Bytes - - - - {files.map(file => ( - - - - - {file.status} - - {new Date(file.created_at * 1000).toLocaleString()} - - {file.usage_bytes} - - ))} - -
- ) : ( -

- No files in this vector store. -

- )} -
-
- - ); - - const sidebar = ( - - - - - - - - - - - ); - - return ( - - ); -} diff --git a/llama_stack/ui/next.config.ts b/llama_stack/ui/next.config.ts deleted file mode 100644 index e9ffa3083..000000000 --- a/llama_stack/ui/next.config.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { NextConfig } from "next"; - -const nextConfig: NextConfig = { - /* config options here */ -}; - -export default nextConfig; diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json deleted file mode 100644 index 8e93fc5ab..000000000 --- a/llama_stack/ui/package-lock.json +++ /dev/null @@ -1,14148 +0,0 @@ -{ - "name": "ui", - "version": "0.1.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "ui", - "version": "0.1.0", - "dependencies": { - "@radix-ui/react-collapsible": "^1.1.12", - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-select": "^2.2.6", - "@radix-ui/react-separator": "^1.1.7", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "framer-motion": "^12.23.24", - "llama-stack-client": "^0.3.0", - "lucide-react": "^0.545.0", - "next": "15.5.4", - "next-auth": "^4.24.11", - "next-themes": "^0.4.6", - "react": "^19.0.0", - "react-dom": "^19.2.0", - "react-markdown": "^10.1.0", - "remark-gfm": "^4.0.1", - "remeda": "^2.32.0", - "shiki": "^3.13.0", - "sonner": "^2.0.7", - "tailwind-merge": "^3.3.1" - }, - "devDependencies": { - "@eslint/eslintrc": "^3", - "@tailwindcss/postcss": "^4", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.8.0", - "@testing-library/react": "^16.3.0", - "@types/jest": "^30.0.0", - "@types/node": "^24", - "@types/react": "^19", - "@types/react-dom": "^19", - "eslint": "^9", - "eslint-config-next": "15.5.6", - "eslint-config-prettier": "^10.1.8", - "eslint-plugin-prettier": "^5.5.4", - "jest": "^30.2.0", - "jest-environment-jsdom": "^30.2.0", - "prettier": "3.6.2", - "tailwindcss": "^4", - "ts-node": "^10.9.2", - "tw-animate-css": "^1.4.0", - "typescript": "^5" - } - }, - "node_modules/@adobe/css-tools": { - "version": "4.4.3", - "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.3.tgz", - "integrity": "sha512-VQKMkwriZbaOgVCby1UDY/LDk5fIjhQicCvVPFqfe+69fWaPWydbWJ3wRt59/YzIwda1I81loas3oCoHxnqvdA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@alloc/quick-lru": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", - "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@asamuzakjp/css-color": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz", - "integrity": "sha512-K1A6z8tS3XsmCMM86xoWdn7Fkdn9m6RSVtocUrJYIwZnFVkng/PvkEoWtOWmP+Scc6saYWHWZYbndEEXxl24jw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@csstools/css-calc": "^2.1.3", - "@csstools/css-color-parser": "^3.0.9", - "@csstools/css-parser-algorithms": "^3.0.4", - "@csstools/css-tokenizer": "^3.0.3", - "lru-cache": "^10.4.3" - } - }, - "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/@babel/code-frame": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", - "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.27.1", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.28.4.tgz", - "integrity": "sha512-YsmSKC29MJwf0gF8Rjjrg5LQCmyh+j/nD8/eP7f+BeoQTKYqs9RoWbjGOdy0+1Ekr68RJZMUOPVQaQisnIo4Rw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/core": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.4.tgz", - "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-compilation-targets": "^7.27.2", - "@babel/helper-module-transforms": "^7.28.3", - "@babel/helpers": "^7.28.4", - "@babel/parser": "^7.28.4", - "@babel/template": "^7.27.2", - "@babel/traverse": "^7.28.4", - "@babel/types": "^7.28.4", - "@jridgewell/remapping": "^2.3.5", - "convert-source-map": "^2.0.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.2.3", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/core/node_modules/json5": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", - "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", - "dev": true, - "license": "MIT", - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/@babel/core/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/generator": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.3.tgz", - "integrity": "sha512-3lSpxGgvnmZznmBkCRnVREPUFJv2wrv9iAoFDvADJc0ypmdOxdUtcLeBgBJ6zE0PMeTKnxeQzyk0xTBq4Ep7zw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.28.3", - "@babel/types": "^7.28.2", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "jsesc": "^3.0.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz", - "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/compat-data": "^7.27.2", - "@babel/helper-validator-option": "^7.27.1", - "browserslist": "^4.24.0", - "lru-cache": "^5.1.1", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-globals": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", - "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz", - "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.27.1", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.28.3", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.3.tgz", - "integrity": "sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1", - "@babel/traverse": "^7.28.3" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-plugin-utils": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.27.1.tgz", - "integrity": "sha512-1gn1Up5YXka3YYAHGKpbideQ5Yjf1tDa9qYcgysz+cNCXukyLl6DjPXhD3VRwSb8c0J9tA4b2+rHEZtc6R0tlw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-string-parser": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", - "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz", - "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", - "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helpers": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.4.tgz", - "integrity": "sha512-HFN59MmQXGHVyYadKLVumYsA9dBFun/ldYxipEjzA4196jpLZd8UjEEBLkbEkvfYreDqJhZxYAWFPtrfhNpj4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.4" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.4.tgz", - "integrity": "sha512-yZbBqeM6TkpP9du/I2pUZnJsRMGGvOuIrhjzC1AwHwW+6he4mni6Bp/m8ijn0iOuZuPI2BfkCoSRunpyjnrQKg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.4" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/plugin-syntax-async-generators": { - "version": "7.8.4", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz", - "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-bigint": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz", - "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-class-properties": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz", - "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.12.13" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-class-static-block": { - "version": "7.14.5", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-static-block/-/plugin-syntax-class-static-block-7.14.5.tgz", - "integrity": "sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.14.5" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-import-attributes": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-attributes/-/plugin-syntax-import-attributes-7.27.1.tgz", - "integrity": "sha512-oFT0FrKHgF53f4vOsZGi2Hh3I35PfSmVs4IBFLFj4dnafP+hIWDLg3VyKmUHfLoLHlyxY4C7DGtmHuJgn+IGww==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-import-meta": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz", - "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.10.4" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-json-strings": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz", - "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-jsx": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.27.1.tgz", - "integrity": "sha512-y8YTNIeKoyhGd9O0Jiyzyyqk8gdjnumGTQPsz0xOZOQ2RmkVJeZ1vmmfIvFEKqucBG6axJGBZDE/7iI5suUI/w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-logical-assignment-operators": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz", - "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.10.4" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-nullish-coalescing-operator": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz", - "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-numeric-separator": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz", - "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.10.4" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-object-rest-spread": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz", - "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-optional-catch-binding": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz", - "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-optional-chaining": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz", - "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.8.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-private-property-in-object": { - "version": "7.14.5", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-private-property-in-object/-/plugin-syntax-private-property-in-object-7.14.5.tgz", - "integrity": "sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.14.5" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-top-level-await": { - "version": "7.14.5", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz", - "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.14.5" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-typescript": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.27.1.tgz", - "integrity": "sha512-xfYCBMxveHrRMnAWl1ZlPXOZjzkN82THFvLhQhFXFt81Z5HnN+EtUkZhv/zcKpmT3fzmWZB0ywiBrbC3vogbwQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/runtime": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.1.tgz", - "integrity": "sha512-1x3D2xEk2fRo3PAhwQwu5UubzgiVWSXTBfWpVd2Mx2AzRqJuDJCsgaDVZ7HB5iGzDW1Hl1sWN2mFyKjmR9uAog==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/template": { - "version": "7.27.2", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz", - "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/parser": "^7.27.2", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/traverse": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.4.tgz", - "integrity": "sha512-YEzuboP2qvQavAcjgQNVgsvHIDv6ZpwXvcvjmyySP2DIMuByS/6ioU5G9pYrWHM6T2YDfc7xga9iNzYOs12CFQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/generator": "^7.28.3", - "@babel/helper-globals": "^7.28.0", - "@babel/parser": "^7.28.4", - "@babel/template": "^7.27.2", - "@babel/types": "^7.28.4", - "debug": "^4.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/types": { - "version": "7.28.4", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.4.tgz", - "integrity": "sha512-bkFqkLhh3pMBUQQkpVgWDWq/lqzc2678eUyDlTBhRqhCHFguYYGM0Efga7tYk4TogG/3x0EEl66/OQ+WGbWB/Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.27.1", - "@babel/helper-validator-identifier": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@bcoe/v8-coverage": { - "version": "0.2.3", - "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz", - "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@cspotcode/source-map-support": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", - "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/trace-mapping": "0.3.9" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@cspotcode/source-map-support/node_modules/@jridgewell/trace-mapping": { - "version": "0.3.9", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", - "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.0.3", - "@jridgewell/sourcemap-codec": "^1.4.10" - } - }, - "node_modules/@csstools/color-helpers": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-5.1.0.tgz", - "integrity": "sha512-S11EXWJyy0Mz5SYvRmY8nJYTFFd1LCNV+7cXyAgQtOOuzb4EsgfqDufL+9esx72/eLhsRdGZwaldu/h+E4t4BA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT-0", - "engines": { - "node": ">=18" - } - }, - "node_modules/@csstools/css-calc": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-2.1.4.tgz", - "integrity": "sha512-3N8oaj+0juUw/1H3YwmDDJXCgTB1gKU6Hc/bB502u9zR0q2vd786XJH9QfrKIEgFlZmhZiq6epXl4rHqhzsIgQ==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@csstools/css-parser-algorithms": "^3.0.5", - "@csstools/css-tokenizer": "^3.0.4" - } - }, - "node_modules/@csstools/css-color-parser": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-3.1.0.tgz", - "integrity": "sha512-nbtKwh3a6xNVIp/VRuXV64yTKnb1IjTAEEh3irzS+HkKjAOYLTGNb9pmVNntZ8iVBHcWDA2Dof0QtPgFI1BaTA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "dependencies": { - "@csstools/color-helpers": "^5.1.0", - "@csstools/css-calc": "^2.1.4" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@csstools/css-parser-algorithms": "^3.0.5", - "@csstools/css-tokenizer": "^3.0.4" - } - }, - "node_modules/@csstools/css-parser-algorithms": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.5.tgz", - "integrity": "sha512-DaDeUkXZKjdGhgYaHNJTV9pV7Y9B3b644jCLs9Upc3VeNGg6LWARAT6O+Q+/COo+2gg/bM5rhpMAtf70WqfBdQ==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@csstools/css-tokenizer": "^3.0.4" - } - }, - "node_modules/@csstools/css-tokenizer": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-3.0.4.tgz", - "integrity": "sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/@emnapi/core": { - "version": "1.4.3", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.4.3.tgz", - "integrity": "sha512-4m62DuCE07lw01soJwPiBGC0nAww0Q+RY70VZ+n49yDIO13yyinhbWCeNnaob0lakDtWQzSdtNWzJeOJt2ma+g==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/wasi-threads": "1.0.2", - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz", - "integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==", - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/wasi-threads": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.2.tgz", - "integrity": "sha512-5n3nTJblwRi8LlXkJ9eBzu+kZR8Yxcc7ubakyQTFzPMtIhFpUBRbsnc2Dv88IZDIbCDlBiWrknhB4Lsz7mg6BA==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@eslint-community/eslint-utils": { - "version": "4.9.0", - "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.0.tgz", - "integrity": "sha512-ayVFHdtZ+hsq1t2Dy24wCmGXGe4q9Gu3smhLYALJrr473ZH27MsnSL+LKUlimp4BWJqMDMLmPpx/Q9R3OAlL4g==", - "dev": true, - "license": "MIT", - "dependencies": { - "eslint-visitor-keys": "^3.4.3" - }, - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - }, - "peerDependencies": { - "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" - } - }, - "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", - "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/@eslint-community/regexpp": { - "version": "4.12.1", - "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.1.tgz", - "integrity": "sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.0.0 || ^14.0.0 || >=16.0.0" - } - }, - "node_modules/@eslint/config-array": { - "version": "0.21.0", - "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.0.tgz", - "integrity": "sha512-ENIdc4iLu0d93HeYirvKmrzshzofPw6VkZRKQGe9Nv46ZnWUzcF1xV01dcvEg/1wXUR61OmmlSfyeyO7EvjLxQ==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@eslint/object-schema": "^2.1.6", - "debug": "^4.3.1", - "minimatch": "^3.1.2" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - } - }, - "node_modules/@eslint/config-helpers": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.0.tgz", - "integrity": "sha512-WUFvV4WoIwW8Bv0KeKCIIEgdSiFOsulyN0xrMu+7z43q/hkOLXjvb5u7UC9jDxvRzcrbEmuZBX5yJZz1741jog==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@eslint/core": "^0.16.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - } - }, - "node_modules/@eslint/core": { - "version": "0.16.0", - "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.16.0.tgz", - "integrity": "sha512-nmC8/totwobIiFcGkDza3GIKfAw1+hLiYVrh3I1nIomQ8PEr5cxg34jnkmGawul/ep52wGRAcyeDCNtWKSOj4Q==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.15" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - } - }, - "node_modules/@eslint/eslintrc": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz", - "integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ajv": "^6.12.4", - "debug": "^4.3.2", - "espree": "^10.0.1", - "globals": "^14.0.0", - "ignore": "^5.2.0", - "import-fresh": "^3.2.1", - "js-yaml": "^4.1.0", - "minimatch": "^3.1.2", - "strip-json-comments": "^3.1.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/@eslint/js": { - "version": "9.37.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.37.0.tgz", - "integrity": "sha512-jaS+NJ+hximswBG6pjNX0uEJZkrT0zwpVi3BA3vX22aFGjJjmgSTSmPpZCRKmoBL5VY/M6p0xsSJx7rk7sy5gg==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - } - }, - "node_modules/@eslint/object-schema": { - "version": "2.1.6", - "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.6.tgz", - "integrity": "sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - } - }, - "node_modules/@eslint/plugin-kit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.0.tgz", - "integrity": "sha512-sB5uyeq+dwCWyPi31B2gQlVlo+j5brPlWx4yZBrEaRo/nhdDE8Xke1gsGgtiBdaBTxuTkceLVuVt/pclrasb0A==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@eslint/core": "^0.16.0", - "levn": "^0.4.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - } - }, - "node_modules/@floating-ui/core": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.0.tgz", - "integrity": "sha512-FRdBLykrPPA6P76GGGqlex/e7fbe0F1ykgxHYNXQsH/iTEtjMj/f9bpY5oQqbjt5VgZvgz/uKXbGuROijh3VLA==", - "license": "MIT", - "dependencies": { - "@floating-ui/utils": "^0.2.9" - } - }, - "node_modules/@floating-ui/dom": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.0.tgz", - "integrity": "sha512-lGTor4VlXcesUMh1cupTUTDoCxMb0V6bm3CnxHzQcw8Eaf1jQbgQX4i02fYgT0vJ82tb5MZ4CZk1LRGkktJCzg==", - "license": "MIT", - "dependencies": { - "@floating-ui/core": "^1.7.0", - "@floating-ui/utils": "^0.2.9" - } - }, - "node_modules/@floating-ui/react-dom": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.2.tgz", - "integrity": "sha512-06okr5cgPzMNBy+Ycse2A6udMi4bqwW/zgBF/rwjcNqWkyr82Mcg8b0vjX8OJpZFy/FKjJmw6wV7t44kK6kW7A==", - "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.0.0" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" - } - }, - "node_modules/@floating-ui/utils": { - "version": "0.2.9", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.9.tgz", - "integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==", - "license": "MIT" - }, - "node_modules/@humanfs/core": { - "version": "0.19.1", - "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", - "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=18.18.0" - } - }, - "node_modules/@humanfs/node": { - "version": "0.16.6", - "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.6.tgz", - "integrity": "sha512-YuI2ZHQL78Q5HbhDiBA1X4LmYdXCKCMQIfw0pw7piHJwyREFebJUvrQN4cMssyES6x+vfUbx1CIpaQUKYdQZOw==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@humanfs/core": "^0.19.1", - "@humanwhocodes/retry": "^0.3.0" - }, - "engines": { - "node": ">=18.18.0" - } - }, - "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.3.1.tgz", - "integrity": "sha512-JBxkERygn7Bv/GbN5Rv8Ul6LVknS+5Bp6RgDC/O8gEBU/yeH5Ui5C/OlWrTb6qct7LjjfT6Re2NxB0ln0yYybA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=18.18" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" - } - }, - "node_modules/@humanwhocodes/module-importer": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", - "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=12.22" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" - } - }, - "node_modules/@humanwhocodes/retry": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.3.tgz", - "integrity": "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=18.18" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" - } - }, - "node_modules/@img/sharp-darwin-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz", - "integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.2.0" - } - }, - "node_modules/@img/sharp-darwin-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz", - "integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.2.0" - } - }, - "node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz", - "integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz", - "integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "darwin" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz", - "integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==", - "cpu": [ - "arm" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz", - "integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-ppc64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz", - "integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==", - "cpu": [ - "ppc64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz", - "integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==", - "cpu": [ - "s390x" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz", - "integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz", - "integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==", - "cpu": [ - "arm64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz", - "integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==", - "cpu": [ - "x64" - ], - "license": "LGPL-3.0-or-later", - "optional": true, - "os": [ - "linux" - ], - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-linux-arm": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz", - "integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==", - "cpu": [ - "arm" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.2.0" - } - }, - "node_modules/@img/sharp-linux-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz", - "integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.2.0" - } - }, - "node_modules/@img/sharp-linux-ppc64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz", - "integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==", - "cpu": [ - "ppc64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-ppc64": "1.2.0" - } - }, - "node_modules/@img/sharp-linux-s390x": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz", - "integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==", - "cpu": [ - "s390x" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.2.0" - } - }, - "node_modules/@img/sharp-linux-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz", - "integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.2.0" - } - }, - "node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz", - "integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.2.0" - } - }, - "node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz", - "integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.2.0" - } - }, - "node_modules/@img/sharp-wasm32": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz", - "integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==", - "cpu": [ - "wasm32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", - "optional": true, - "dependencies": { - "@emnapi/runtime": "^1.4.4" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz", - "integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==", - "cpu": [ - "arm64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-ia32": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz", - "integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==", - "cpu": [ - "ia32" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@img/sharp-win32-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz", - "integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==", - "cpu": [ - "x64" - ], - "license": "Apache-2.0 AND LGPL-3.0-or-later", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@isaacs/fs-minipass": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", - "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.4" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@istanbuljs/load-nyc-config": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", - "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "camelcase": "^5.3.1", - "find-up": "^4.1.0", - "get-package-type": "^0.1.0", - "js-yaml": "^3.13.1", - "resolve-from": "^5.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/argparse": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", - "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", - "dev": true, - "license": "MIT", - "dependencies": { - "sprintf-js": "~1.0.2" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/find-up": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", - "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", - "dev": true, - "license": "MIT", - "dependencies": { - "locate-path": "^5.0.0", - "path-exists": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/js-yaml": { - "version": "3.14.1", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", - "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", - "dev": true, - "license": "MIT", - "dependencies": { - "argparse": "^1.0.7", - "esprima": "^4.0.0" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/locate-path": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", - "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-locate": "^4.1.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/p-limit": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", - "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-try": "^2.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/p-locate": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", - "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-limit": "^2.2.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/@istanbuljs/load-nyc-config/node_modules/resolve-from": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", - "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/@istanbuljs/schema": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz", - "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/@jest/console": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/console/-/console-30.2.0.tgz", - "integrity": "sha512-+O1ifRjkvYIkBqASKWgLxrpEhQAAE7hY77ALLUufSk5717KfOShg6IbqLmdsLMPdUiFvA2kTs0R7YZy+l0IzZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "@types/node": "*", - "chalk": "^4.1.2", - "jest-message-util": "30.2.0", - "jest-util": "30.2.0", - "slash": "^3.0.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/core": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/core/-/core-30.2.0.tgz", - "integrity": "sha512-03W6IhuhjqTlpzh/ojut/pDB2LPRygyWX8ExpgHtQA8H/3K7+1vKmcINx5UzeOX1se6YEsBsOHQ1CRzf3fOwTQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/console": "30.2.0", - "@jest/pattern": "30.0.1", - "@jest/reporters": "30.2.0", - "@jest/test-result": "30.2.0", - "@jest/transform": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "ansi-escapes": "^4.3.2", - "chalk": "^4.1.2", - "ci-info": "^4.2.0", - "exit-x": "^0.2.2", - "graceful-fs": "^4.2.11", - "jest-changed-files": "30.2.0", - "jest-config": "30.2.0", - "jest-haste-map": "30.2.0", - "jest-message-util": "30.2.0", - "jest-regex-util": "30.0.1", - "jest-resolve": "30.2.0", - "jest-resolve-dependencies": "30.2.0", - "jest-runner": "30.2.0", - "jest-runtime": "30.2.0", - "jest-snapshot": "30.2.0", - "jest-util": "30.2.0", - "jest-validate": "30.2.0", - "jest-watcher": "30.2.0", - "micromatch": "^4.0.8", - "pretty-format": "30.2.0", - "slash": "^3.0.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" - }, - "peerDependenciesMeta": { - "node-notifier": { - "optional": true - } - } - }, - "node_modules/@jest/core/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/@jest/core/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/core/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@jest/diff-sequences": { - "version": "30.0.1", - "resolved": "https://registry.npmjs.org/@jest/diff-sequences/-/diff-sequences-30.0.1.tgz", - "integrity": "sha512-n5H8QLDJ47QqbCNn5SuFjCRDrOLEZ0h8vAHCK5RL9Ls7Xa8AQLa/YxAc9UjFqoEDM48muwtBGjtMY5cr0PLDCw==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/environment": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-30.2.0.tgz", - "integrity": "sha512-/QPTL7OBJQ5ac09UDRa3EQes4gt1FTEG/8jZ/4v5IVzx+Cv7dLxlVIvfvSVRiiX2drWyXeBjkMSR8hvOWSog5g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/fake-timers": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "jest-mock": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/environment-jsdom-abstract": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/environment-jsdom-abstract/-/environment-jsdom-abstract-30.2.0.tgz", - "integrity": "sha512-kazxw2L9IPuZpQ0mEt9lu9Z98SqR74xcagANmMBU16X0lS23yPc0+S6hGLUz8kVRlomZEs/5S/Zlpqwf5yu6OQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/fake-timers": "30.2.0", - "@jest/types": "30.2.0", - "@types/jsdom": "^21.1.7", - "@types/node": "*", - "jest-mock": "30.2.0", - "jest-util": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "canvas": "^3.0.0", - "jsdom": "*" - }, - "peerDependenciesMeta": { - "canvas": { - "optional": true - } - } - }, - "node_modules/@jest/expect": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-30.2.0.tgz", - "integrity": "sha512-V9yxQK5erfzx99Sf+7LbhBwNWEZ9eZay8qQ9+JSC0TrMR1pMDHLMY+BnVPacWU6Jamrh252/IKo4F1Xn/zfiqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "expect": "30.2.0", - "jest-snapshot": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/expect-utils": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-30.2.0.tgz", - "integrity": "sha512-1JnRfhqpD8HGpOmQp180Fo9Zt69zNtC+9lR+kT7NVL05tNXIi+QC8Csz7lfidMoVLPD3FnOtcmp0CEFnxExGEA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/fake-timers": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-30.2.0.tgz", - "integrity": "sha512-HI3tRLjRxAbBy0VO8dqqm7Hb2mIa8d5bg/NJkyQcOk7V118ObQML8RC5luTF/Zsg4474a+gDvhce7eTnP4GhYw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "@sinonjs/fake-timers": "^13.0.0", - "@types/node": "*", - "jest-message-util": "30.2.0", - "jest-mock": "30.2.0", - "jest-util": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/get-type": { - "version": "30.1.0", - "resolved": "https://registry.npmjs.org/@jest/get-type/-/get-type-30.1.0.tgz", - "integrity": "sha512-eMbZE2hUnx1WV0pmURZY9XoXPkUYjpc55mb0CrhtdWLtzMQPFvu/rZkTLZFTsdaVQa+Tr4eWAteqcUzoawq/uA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/globals": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-30.2.0.tgz", - "integrity": "sha512-b63wmnKPaK+6ZZfpYhz9K61oybvbI1aMcIs80++JI1O1rR1vaxHUCNqo3ITu6NU0d4V34yZFoHMn/uoKr/Rwfw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/expect": "30.2.0", - "@jest/types": "30.2.0", - "jest-mock": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/pattern": { - "version": "30.0.1", - "resolved": "https://registry.npmjs.org/@jest/pattern/-/pattern-30.0.1.tgz", - "integrity": "sha512-gWp7NfQW27LaBQz3TITS8L7ZCQ0TLvtmI//4OwlQRx4rnWxcPNIYjxZpDcN4+UlGxgm3jS5QPz8IPTCkb59wZA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*", - "jest-regex-util": "30.0.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/reporters": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-30.2.0.tgz", - "integrity": "sha512-DRyW6baWPqKMa9CzeiBjHwjd8XeAyco2Vt8XbcLFjiwCOEKOvy82GJ8QQnJE9ofsxCMPjH4MfH8fCWIHHDKpAQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@bcoe/v8-coverage": "^0.2.3", - "@jest/console": "30.2.0", - "@jest/test-result": "30.2.0", - "@jest/transform": "30.2.0", - "@jest/types": "30.2.0", - "@jridgewell/trace-mapping": "^0.3.25", - "@types/node": "*", - "chalk": "^4.1.2", - "collect-v8-coverage": "^1.0.2", - "exit-x": "^0.2.2", - "glob": "^10.3.10", - "graceful-fs": "^4.2.11", - "istanbul-lib-coverage": "^3.0.0", - "istanbul-lib-instrument": "^6.0.0", - "istanbul-lib-report": "^3.0.0", - "istanbul-lib-source-maps": "^5.0.0", - "istanbul-reports": "^3.1.3", - "jest-message-util": "30.2.0", - "jest-util": "30.2.0", - "jest-worker": "30.2.0", - "slash": "^3.0.0", - "string-length": "^4.0.2", - "v8-to-istanbul": "^9.0.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" - }, - "peerDependenciesMeta": { - "node-notifier": { - "optional": true - } - } - }, - "node_modules/@jest/schemas": { - "version": "30.0.5", - "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-30.0.5.tgz", - "integrity": "sha512-DmdYgtezMkh3cpU8/1uyXakv3tJRcmcXxBOcO0tbaozPwpmh4YMsnWrQm9ZmZMfa5ocbxzbFk6O4bDPEc/iAnA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@sinclair/typebox": "^0.34.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/snapshot-utils": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/snapshot-utils/-/snapshot-utils-30.2.0.tgz", - "integrity": "sha512-0aVxM3RH6DaiLcjj/b0KrIBZhSX1373Xci4l3cW5xiUWPctZ59zQ7jj4rqcJQ/Z8JuN/4wX3FpJSa3RssVvCug==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "chalk": "^4.1.2", - "graceful-fs": "^4.2.11", - "natural-compare": "^1.4.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/source-map": { - "version": "30.0.1", - "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-30.0.1.tgz", - "integrity": "sha512-MIRWMUUR3sdbP36oyNyhbThLHyJ2eEDClPCiHVbrYAe5g3CHRArIVpBw7cdSB5fr+ofSfIb2Tnsw8iEHL0PYQg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/trace-mapping": "^0.3.25", - "callsites": "^3.1.0", - "graceful-fs": "^4.2.11" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/test-result": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-30.2.0.tgz", - "integrity": "sha512-RF+Z+0CCHkARz5HT9mcQCBulb1wgCP3FBvl9VFokMX27acKphwyQsNuWH3c+ojd1LeWBLoTYoxF0zm6S/66mjg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/console": "30.2.0", - "@jest/types": "30.2.0", - "@types/istanbul-lib-coverage": "^2.0.6", - "collect-v8-coverage": "^1.0.2" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/test-sequencer": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-30.2.0.tgz", - "integrity": "sha512-wXKgU/lk8fKXMu/l5Hog1R61bL4q5GCdT6OJvdAFz1P+QrpoFuLU68eoKuVc4RbrTtNnTL5FByhWdLgOPSph+Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/test-result": "30.2.0", - "graceful-fs": "^4.2.11", - "jest-haste-map": "30.2.0", - "slash": "^3.0.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/transform": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-30.2.0.tgz", - "integrity": "sha512-XsauDV82o5qXbhalKxD7p4TZYYdwcaEXC77PPD2HixEFF+6YGppjrAAQurTl2ECWcEomHBMMNS9AH3kcCFx8jA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.27.4", - "@jest/types": "30.2.0", - "@jridgewell/trace-mapping": "^0.3.25", - "babel-plugin-istanbul": "^7.0.1", - "chalk": "^4.1.2", - "convert-source-map": "^2.0.0", - "fast-json-stable-stringify": "^2.1.0", - "graceful-fs": "^4.2.11", - "jest-haste-map": "30.2.0", - "jest-regex-util": "30.0.1", - "jest-util": "30.2.0", - "micromatch": "^4.0.8", - "pirates": "^4.0.7", - "slash": "^3.0.0", - "write-file-atomic": "^5.0.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/types": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/@jest/types/-/types-30.2.0.tgz", - "integrity": "sha512-H9xg1/sfVvyfU7o3zMfBEjQ1gcsdeTMgqHoYdN79tuLqfTtuu7WckRA1R5whDwOzxaZAeMKTYWqP+WCAi0CHsg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/pattern": "30.0.1", - "@jest/schemas": "30.0.5", - "@types/istanbul-lib-coverage": "^2.0.6", - "@types/istanbul-reports": "^3.0.4", - "@types/node": "*", - "@types/yargs": "^17.0.33", - "chalk": "^4.1.2" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", - "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/remapping": { - "version": "2.3.5", - "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", - "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "dev": true, - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.31", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", - "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@napi-rs/wasm-runtime": { - "version": "0.2.12", - "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", - "integrity": "sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/core": "^1.4.3", - "@emnapi/runtime": "^1.4.3", - "@tybys/wasm-util": "^0.10.0" - } - }, - "node_modules/@napi-rs/wasm-runtime/node_modules/@tybys/wasm-util": { - "version": "0.10.1", - "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", - "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@next/env": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.4.tgz", - "integrity": "sha512-27SQhYp5QryzIT5uO8hq99C69eLQ7qkzkDPsk3N+GuS2XgOgoYEeOav7Pf8Tn4drECOVDsDg8oj+/DVy8qQL2A==", - "license": "MIT" - }, - "node_modules/@next/eslint-plugin-next": { - "version": "15.5.6", - "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.5.6.tgz", - "integrity": "sha512-YxDvsT2fwy1j5gMqk3ppXlsgDopHnkM4BoxSVASbvvgh5zgsK8lvWerDzPip8k3WVzsTZ1O7A7si1KNfN4OZfQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "fast-glob": "3.3.1" - } - }, - "node_modules/@next/swc-darwin-arm64": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.4.tgz", - "integrity": "sha512-nopqz+Ov6uvorej8ndRX6HlxCYWCO3AHLfKK2TYvxoSB2scETOcfm/HSS3piPqc3A+MUgyHoqE6je4wnkjfrOA==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-darwin-x64": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.4.tgz", - "integrity": "sha512-QOTCFq8b09ghfjRJKfb68kU9k2K+2wsC4A67psOiMn849K9ZXgCSRQr0oVHfmKnoqCbEmQWG1f2h1T2vtJJ9mA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-linux-arm64-gnu": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.4.tgz", - "integrity": "sha512-eRD5zkts6jS3VfE/J0Kt1VxdFqTnMc3QgO5lFE5GKN3KDI/uUpSyK3CjQHmfEkYR4wCOl0R0XrsjpxfWEA++XA==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-linux-arm64-musl": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.4.tgz", - "integrity": "sha512-TOK7iTxmXFc45UrtKqWdZ1shfxuL4tnVAOuuJK4S88rX3oyVV4ZkLjtMT85wQkfBrOOvU55aLty+MV8xmcJR8A==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-linux-x64-gnu": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.4.tgz", - "integrity": "sha512-7HKolaj+481FSW/5lL0BcTkA4Ueam9SPYWyN/ib/WGAFZf0DGAN8frNpNZYFHtM4ZstrHZS3LY3vrwlIQfsiMA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-linux-x64-musl": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.4.tgz", - "integrity": "sha512-nlQQ6nfgN0nCO/KuyEUwwOdwQIGjOs4WNMjEUtpIQJPR2NUfmGpW2wkJln1d4nJ7oUzd1g4GivH5GoEPBgfsdw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-win32-arm64-msvc": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.4.tgz", - "integrity": "sha512-PcR2bN7FlM32XM6eumklmyWLLbu2vs+D7nJX8OAIoWy69Kef8mfiN4e8TUv2KohprwifdpFKPzIP1njuCjD0YA==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@next/swc-win32-x64-msvc": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.4.tgz", - "integrity": "sha512-1ur2tSHZj8Px/KMAthmuI9FMp/YFusMMGoRNJaRZMOlSkgvLjzosSdQI0cJAKogdHl3qXUQKL9MGaYvKwA7DXg==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nolyfill/is-core-module": { - "version": "1.0.39", - "resolved": "https://registry.npmjs.org/@nolyfill/is-core-module/-/is-core-module-1.0.39.tgz", - "integrity": "sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.4.0" - } - }, - "node_modules/@panva/hkdf": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@panva/hkdf/-/hkdf-1.2.1.tgz", - "integrity": "sha512-6oclG6Y3PiDFcoyk8srjLfVKyMfVCKJ27JwNPViuXziFpmdz+MZnZN/aKY0JGXgYuO/VghU0jcOAZgWXZ1Dmrw==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/@pkgr/core": { - "version": "0.2.9", - "resolved": "https://registry.npmjs.org/@pkgr/core/-/core-0.2.9.tgz", - "integrity": "sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.18.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/pkgr" - } - }, - "node_modules/@radix-ui/number": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz", - "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==", - "license": "MIT" - }, - "node_modules/@radix-ui/primitive": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", - "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", - "license": "MIT" - }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", - "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-collapsible": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz", - "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-collection": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", - "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", - "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", - "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dialog": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz", - "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-direction": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", - "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", - "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-dropdown-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz", - "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-menu": "2.1.16", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", - "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", - "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", - "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-menu": { - "version": "2.1.16", - "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz", - "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popper": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", - "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", - "license": "MIT", - "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-portal": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", - "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-presence": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", - "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", - "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-slot": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-roving-focus": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", - "integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-select": { - "version": "2.2.6", - "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz", - "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-visually-hidden": "1.2.3", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-separator": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", - "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", - "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-callback-ref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", - "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", - "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-effect-event": "0.0.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-effect-event": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", - "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", - "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", - "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-previous": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-previous/-/react-use-previous-1.1.1.tgz", - "integrity": "sha512-2dHfToCj/pzca2Ck724OZ5L0EVrr3eHRNsG/b3xQJLA2hZpVCS99bLAX+hm1IHXDEnzU6by5z/5MIY794/a8NQ==", - "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz", - "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==", - "license": "MIT", - "dependencies": { - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-use-size": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz", - "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", - "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/rect": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", - "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==", - "license": "MIT" - }, - "node_modules/@rtsao/scc": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@rtsao/scc/-/scc-1.1.0.tgz", - "integrity": "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rushstack/eslint-patch": { - "version": "1.11.0", - "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.11.0.tgz", - "integrity": "sha512-zxnHvoMQVqewTJr/W4pKjF0bMGiKJv1WX7bSrkl46Hg0QjESbzBROWK0Wg4RphzSOS5Jiy7eFimmM3UgMrMZbQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@shikijs/core": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-3.13.0.tgz", - "integrity": "sha512-3P8rGsg2Eh2qIHekwuQjzWhKI4jV97PhvYjYUzGqjvJfqdQPz+nMlfWahU24GZAyW1FxFI1sYjyhfh5CoLmIUA==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.13.0", - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4", - "hast-util-to-html": "^9.0.5" - } - }, - "node_modules/@shikijs/engine-javascript": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-3.13.0.tgz", - "integrity": "sha512-Ty7xv32XCp8u0eQt8rItpMs6rU9Ki6LJ1dQOW3V/56PKDcpvfHPnYFbsx5FFUP2Yim34m/UkazidamMNVR4vKg==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.13.0", - "@shikijs/vscode-textmate": "^10.0.2", - "oniguruma-to-es": "^4.3.3" - } - }, - "node_modules/@shikijs/engine-oniguruma": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-3.13.0.tgz", - "integrity": "sha512-O42rBGr4UDSlhT2ZFMxqM7QzIU+IcpoTMzb3W7AlziI1ZF7R8eS2M0yt5Ry35nnnTX/LTLXFPUjRFCIW+Operg==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.13.0", - "@shikijs/vscode-textmate": "^10.0.2" - } - }, - "node_modules/@shikijs/langs": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/langs/-/langs-3.13.0.tgz", - "integrity": "sha512-672c3WAETDYHwrRP0yLy3W1QYB89Hbpj+pO4KhxK6FzIrDI2FoEXNiNCut6BQmEApYLfuYfpgOZaqbY+E9b8wQ==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.13.0" - } - }, - "node_modules/@shikijs/themes": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/themes/-/themes-3.13.0.tgz", - "integrity": "sha512-Vxw1Nm1/Od8jyA7QuAenaV78BG2nSr3/gCGdBkLpfLscddCkzkL36Q5b67SrLLfvAJTOUzW39x4FHVCFriPVgg==", - "license": "MIT", - "dependencies": { - "@shikijs/types": "3.13.0" - } - }, - "node_modules/@shikijs/types": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-3.13.0.tgz", - "integrity": "sha512-oM9P+NCFri/mmQ8LoFGVfVyemm5Hi27330zuOBp0annwJdKH1kOLndw3zCtAVDehPLg9fKqoEx3Ht/wNZxolfw==", - "license": "MIT", - "dependencies": { - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4" - } - }, - "node_modules/@shikijs/vscode-textmate": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/@shikijs/vscode-textmate/-/vscode-textmate-10.0.2.tgz", - "integrity": "sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==", - "license": "MIT" - }, - "node_modules/@sinclair/typebox": { - "version": "0.34.41", - "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.41.tgz", - "integrity": "sha512-6gS8pZzSXdyRHTIqoqSVknxolr1kzfy4/CeDnrzsVz8TTIWUbOBr6gnzOmTYJ3eXQNh4IYHIGi5aIL7sOZ2G/g==", - "dev": true, - "license": "MIT" - }, - "node_modules/@sinonjs/commons": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz", - "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "type-detect": "4.0.8" - } - }, - "node_modules/@sinonjs/fake-timers": { - "version": "13.0.5", - "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-13.0.5.tgz", - "integrity": "sha512-36/hTbH2uaWuGVERyC6da9YwGWnzUZXuPro/F2LfsdOsLnCojz/iSH8MxUt/FD2S5XBSVPhmArFUXcpCQ2Hkiw==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "@sinonjs/commons": "^3.0.1" - } - }, - "node_modules/@swc/helpers": { - "version": "0.5.15", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz", - "integrity": "sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==", - "license": "Apache-2.0", - "dependencies": { - "tslib": "^2.8.0" - } - }, - "node_modules/@tailwindcss/node": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.14.tgz", - "integrity": "sha512-hpz+8vFk3Ic2xssIA3e01R6jkmsAhvkQdXlEbRTk6S10xDAtiQiM3FyvZVGsucefq764euO/b8WUW9ysLdThHw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/remapping": "^2.3.4", - "enhanced-resolve": "^5.18.3", - "jiti": "^2.6.0", - "lightningcss": "1.30.1", - "magic-string": "^0.30.19", - "source-map-js": "^1.2.1", - "tailwindcss": "4.1.14" - } - }, - "node_modules/@tailwindcss/oxide": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.14.tgz", - "integrity": "sha512-23yx+VUbBwCg2x5XWdB8+1lkPajzLmALEfMb51zZUBYaYVPDQvBSD/WYDqiVyBIo2BZFa3yw1Rpy3G2Jp+K0dw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "dependencies": { - "detect-libc": "^2.0.4", - "tar": "^7.5.1" - }, - "engines": { - "node": ">= 10" - }, - "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.1.14", - "@tailwindcss/oxide-darwin-arm64": "4.1.14", - "@tailwindcss/oxide-darwin-x64": "4.1.14", - "@tailwindcss/oxide-freebsd-x64": "4.1.14", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.14", - "@tailwindcss/oxide-linux-arm64-gnu": "4.1.14", - "@tailwindcss/oxide-linux-arm64-musl": "4.1.14", - "@tailwindcss/oxide-linux-x64-gnu": "4.1.14", - "@tailwindcss/oxide-linux-x64-musl": "4.1.14", - "@tailwindcss/oxide-wasm32-wasi": "4.1.14", - "@tailwindcss/oxide-win32-arm64-msvc": "4.1.14", - "@tailwindcss/oxide-win32-x64-msvc": "4.1.14" - } - }, - "node_modules/@tailwindcss/oxide-android-arm64": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.14.tgz", - "integrity": "sha512-a94ifZrGwMvbdeAxWoSuGcIl6/DOP5cdxagid7xJv6bwFp3oebp7y2ImYsnZBMTwjn5Ev5xESvS3FFYUGgPODQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-darwin-arm64": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.14.tgz", - "integrity": "sha512-HkFP/CqfSh09xCnrPJA7jud7hij5ahKyWomrC3oiO2U9i0UjP17o9pJbxUN0IJ471GTQQmzwhp0DEcpbp4MZTA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-darwin-x64": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.14.tgz", - "integrity": "sha512-eVNaWmCgdLf5iv6Qd3s7JI5SEFBFRtfm6W0mphJYXgvnDEAZ5sZzqmI06bK6xo0IErDHdTA5/t7d4eTfWbWOFw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-freebsd-x64": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.14.tgz", - "integrity": "sha512-QWLoRXNikEuqtNb0dhQN6wsSVVjX6dmUFzuuiL09ZeXju25dsei2uIPl71y2Ic6QbNBsB4scwBoFnlBfabHkEw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.14.tgz", - "integrity": "sha512-VB4gjQni9+F0VCASU+L8zSIyjrLLsy03sjcR3bM0V2g4SNamo0FakZFKyUQ96ZVwGK4CaJsc9zd/obQy74o0Fw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.14.tgz", - "integrity": "sha512-qaEy0dIZ6d9vyLnmeg24yzA8XuEAD9WjpM5nIM1sUgQ/Zv7cVkharPDQcmm/t/TvXoKo/0knI3me3AGfdx6w1w==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-linux-arm64-musl": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.14.tgz", - "integrity": "sha512-ISZjT44s59O8xKsPEIesiIydMG/sCXoMBCqsphDm/WcbnuWLxxb+GcvSIIA5NjUw6F8Tex7s5/LM2yDy8RqYBQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.14.tgz", - "integrity": "sha512-02c6JhLPJj10L2caH4U0zF8Hji4dOeahmuMl23stk0MU1wfd1OraE7rOloidSF8W5JTHkFdVo/O7uRUJJnUAJg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-linux-x64-musl": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.14.tgz", - "integrity": "sha512-TNGeLiN1XS66kQhxHG/7wMeQDOoL0S33x9BgmydbrWAb9Qw0KYdd8o1ifx4HOGDWhVmJ+Ul+JQ7lyknQFilO3Q==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.14.tgz", - "integrity": "sha512-uZYAsaW/jS/IYkd6EWPJKW/NlPNSkWkBlaeVBi/WsFQNP05/bzkebUL8FH1pdsqx4f2fH/bWFcUABOM9nfiJkQ==", - "bundleDependencies": [ - "@napi-rs/wasm-runtime", - "@emnapi/core", - "@emnapi/runtime", - "@tybys/wasm-util", - "@emnapi/wasi-threads", - "tslib" - ], - "cpu": [ - "wasm32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/core": "^1.5.0", - "@emnapi/runtime": "^1.5.0", - "@emnapi/wasi-threads": "^1.1.0", - "@napi-rs/wasm-runtime": "^1.0.5", - "@tybys/wasm-util": "^0.10.1", - "tslib": "^2.4.0" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": { - "version": "1.5.0", - "dev": true, - "inBundle": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/wasi-threads": "1.1.0", - "tslib": "^2.4.0" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": { - "version": "1.5.0", - "dev": true, - "inBundle": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": { - "version": "1.1.0", - "dev": true, - "inBundle": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": { - "version": "1.0.5", - "dev": true, - "inBundle": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/core": "^1.5.0", - "@emnapi/runtime": "^1.5.0", - "@tybys/wasm-util": "^0.10.1" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": { - "version": "0.10.1", - "dev": true, - "inBundle": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, - "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": { - "version": "2.8.1", - "dev": true, - "inBundle": true, - "license": "0BSD", - "optional": true - }, - "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.14.tgz", - "integrity": "sha512-Az0RnnkcvRqsuoLH2Z4n3JfAef0wElgzHD5Aky/e+0tBUxUhIeIqFBTMNQvmMRSP15fWwmvjBxZ3Q8RhsDnxAA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/oxide-win32-x64-msvc": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.14.tgz", - "integrity": "sha512-ttblVGHgf68kEE4om1n/n44I0yGPkCPbLsqzjvybhpwa6mKKtgFfAzy6btc3HRmuW7nHe0OOrSeNP9sQmmH9XA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - } - }, - "node_modules/@tailwindcss/postcss": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.14.tgz", - "integrity": "sha512-BdMjIxy7HUNThK87C7BC8I1rE8BVUsfNQSI5siQ4JK3iIa3w0XyVvVL9SXLWO//CtYTcp1v7zci0fYwJOjB+Zg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@alloc/quick-lru": "^5.2.0", - "@tailwindcss/node": "4.1.14", - "@tailwindcss/oxide": "4.1.14", - "postcss": "^8.4.41", - "tailwindcss": "4.1.14" - } - }, - "node_modules/@testing-library/dom": { - "version": "10.4.1", - "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", - "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.10.4", - "@babel/runtime": "^7.12.5", - "@types/aria-query": "^5.0.1", - "aria-query": "5.3.0", - "dom-accessibility-api": "^0.5.9", - "lz-string": "^1.5.0", - "picocolors": "1.1.1", - "pretty-format": "^27.0.2" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@testing-library/dom/node_modules/aria-query": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", - "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "dequal": "^2.0.3" - } - }, - "node_modules/@testing-library/jest-dom": { - "version": "6.8.0", - "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz", - "integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@adobe/css-tools": "^4.4.0", - "aria-query": "^5.0.0", - "css.escape": "^1.5.1", - "dom-accessibility-api": "^0.6.3", - "picocolors": "^1.1.1", - "redent": "^3.0.0" - }, - "engines": { - "node": ">=14", - "npm": ">=6", - "yarn": ">=1" - } - }, - "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz", - "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@testing-library/react": { - "version": "16.3.0", - "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.0.tgz", - "integrity": "sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.12.5" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@testing-library/dom": "^10.0.0", - "@types/react": "^18.0.0 || ^19.0.0", - "@types/react-dom": "^18.0.0 || ^19.0.0", - "react": "^18.0.0 || ^19.0.0", - "react-dom": "^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@tsconfig/node10": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", - "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@tsconfig/node12": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", - "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", - "dev": true, - "license": "MIT" - }, - "node_modules/@tsconfig/node14": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", - "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", - "dev": true, - "license": "MIT" - }, - "node_modules/@tsconfig/node16": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", - "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/aria-query": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz", - "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/babel__core": { - "version": "7.20.5", - "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", - "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.20.7", - "@babel/types": "^7.20.7", - "@types/babel__generator": "*", - "@types/babel__template": "*", - "@types/babel__traverse": "*" - } - }, - "node_modules/@types/babel__generator": { - "version": "7.27.0", - "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz", - "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__template": { - "version": "7.4.4", - "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", - "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.1.0", - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__traverse": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz", - "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - } - }, - "node_modules/@types/debug": { - "version": "4.1.12", - "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", - "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", - "license": "MIT", - "dependencies": { - "@types/ms": "*" - } - }, - "node_modules/@types/estree": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.7.tgz", - "integrity": "sha512-w28IoSUCJpidD/TGviZwwMJckNESJZXFu7NBZ5YJ4mEUnNraUn9Pm8HSZm/jDF1pDWYKspWE7oVphigUPRakIQ==", - "license": "MIT" - }, - "node_modules/@types/estree-jsx": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", - "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", - "license": "MIT", - "dependencies": { - "@types/estree": "*" - } - }, - "node_modules/@types/hast": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", - "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "*" - } - }, - "node_modules/@types/istanbul-lib-coverage": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", - "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/istanbul-lib-report": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", - "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/istanbul-lib-coverage": "*" - } - }, - "node_modules/@types/istanbul-reports": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", - "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/istanbul-lib-report": "*" - } - }, - "node_modules/@types/jest": { - "version": "30.0.0", - "resolved": "https://registry.npmjs.org/@types/jest/-/jest-30.0.0.tgz", - "integrity": "sha512-XTYugzhuwqWjws0CVz8QpM36+T+Dz5mTEBKhNs/esGLnCIlGdRy+Dq78NRjd7ls7r8BC8ZRMOrKlkO1hU0JOwA==", - "dev": true, - "license": "MIT", - "dependencies": { - "expect": "^30.0.0", - "pretty-format": "^30.0.0" - } - }, - "node_modules/@types/jest/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/@types/jest/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/jsdom": { - "version": "21.1.7", - "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.7.tgz", - "integrity": "sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*", - "@types/tough-cookie": "*", - "parse5": "^7.0.0" - } - }, - "node_modules/@types/json-schema": { - "version": "7.0.15", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", - "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/json5": { - "version": "0.0.29", - "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", - "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/mdast": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", - "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", - "license": "MIT", - "dependencies": { - "@types/unist": "*" - } - }, - "node_modules/@types/ms": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", - "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "24.8.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.8.1.tgz", - "integrity": "sha512-alv65KGRadQVfVcG69MuB4IzdYVpRwMG/mq8KWOaoOdyY617P5ivaDiMCGOFDWD2sAn5Q0mR3mRtUOgm99hL9Q==", - "license": "MIT", - "dependencies": { - "undici-types": "~7.14.0" - } - }, - "node_modules/@types/node-fetch": { - "version": "2.6.12", - "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.12.tgz", - "integrity": "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==", - "license": "MIT", - "dependencies": { - "@types/node": "*", - "form-data": "^4.0.0" - } - }, - "node_modules/@types/react": { - "version": "19.2.2", - "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.2.tgz", - "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==", - "license": "MIT", - "dependencies": { - "csstype": "^3.0.2" - } - }, - "node_modules/@types/react-dom": { - "version": "19.2.1", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.1.tgz", - "integrity": "sha512-/EEvYBdT3BflCWvTMO7YkYBHVE9Ci6XdqZciZANQgKpaiDRGOLIlRo91jbTNRQjgPFWVaRxcYc0luVNFitz57A==", - "devOptional": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "^19.2.0" - } - }, - "node_modules/@types/stack-utils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", - "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/tough-cookie": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", - "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/unist": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", - "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", - "license": "MIT" - }, - "node_modules/@types/yargs": { - "version": "17.0.33", - "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz", - "integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/yargs-parser": "*" - } - }, - "node_modules/@types/yargs-parser": { - "version": "21.0.3", - "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", - "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.40.0.tgz", - "integrity": "sha512-w/EboPlBwnmOBtRbiOvzjD+wdiZdgFeo17lkltrtn7X37vagKKWJABvyfsJXTlHe6XBzugmYgd4A4nW+k8Mixw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.40.0", - "@typescript-eslint/type-utils": "8.40.0", - "@typescript-eslint/utils": "8.40.0", - "@typescript-eslint/visitor-keys": "8.40.0", - "graphemer": "^1.4.0", - "ignore": "^7.0.0", - "natural-compare": "^1.4.0", - "ts-api-utils": "^2.1.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "@typescript-eslint/parser": "^8.40.0", - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": { - "version": "7.0.5", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", - "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/@typescript-eslint/parser": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.40.0.tgz", - "integrity": "sha512-jCNyAuXx8dr5KJMkecGmZ8KI61KBUhkCob+SD+C+I5+Y1FWI2Y3QmY4/cxMCC5WAsZqoEtEETVhUiUMIGCf6Bw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/scope-manager": "8.40.0", - "@typescript-eslint/types": "8.40.0", - "@typescript-eslint/typescript-estree": "8.40.0", - "@typescript-eslint/visitor-keys": "8.40.0", - "debug": "^4.3.4" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/project-service": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.40.0.tgz", - "integrity": "sha512-/A89vz7Wf5DEXsGVvcGdYKbVM9F7DyFXj52lNYUDS1L9yJfqjW/fIp5PgMuEJL/KeqVTe2QSbXAGUZljDUpArw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.40.0", - "@typescript-eslint/types": "^8.40.0", - "debug": "^4.3.4" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/scope-manager": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.40.0.tgz", - "integrity": "sha512-y9ObStCcdCiZKzwqsE8CcpyuVMwRouJbbSrNuThDpv16dFAj429IkM6LNb1dZ2m7hK5fHyzNcErZf7CEeKXR4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.40.0", - "@typescript-eslint/visitor-keys": "8.40.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.40.0.tgz", - "integrity": "sha512-jtMytmUaG9d/9kqSl/W3E3xaWESo4hFDxAIHGVW/WKKtQhesnRIJSAJO6XckluuJ6KDB5woD1EiqknriCtAmcw==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/type-utils": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.40.0.tgz", - "integrity": "sha512-eE60cK4KzAc6ZrzlJnflXdrMqOBaugeukWICO2rB0KNvwdIMaEaYiywwHMzA1qFpTxrLhN9Lp4E/00EgWcD3Ow==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.40.0", - "@typescript-eslint/typescript-estree": "8.40.0", - "@typescript-eslint/utils": "8.40.0", - "debug": "^4.3.4", - "ts-api-utils": "^2.1.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/types": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.40.0.tgz", - "integrity": "sha512-ETdbFlgbAmXHyFPwqUIYrfc12ArvpBhEVgGAxVYSwli26dn8Ko+lIo4Su9vI9ykTZdJn+vJprs/0eZU0YMAEQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.40.0.tgz", - "integrity": "sha512-k1z9+GJReVVOkc1WfVKs1vBrR5MIKKbdAjDTPvIK3L8De6KbFfPFt6BKpdkdk7rZS2GtC/m6yI5MYX+UsuvVYQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/project-service": "8.40.0", - "@typescript-eslint/tsconfig-utils": "8.40.0", - "@typescript-eslint/types": "8.40.0", - "@typescript-eslint/visitor-keys": "8.40.0", - "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", - "minimatch": "^9.0.4", - "semver": "^7.6.0", - "ts-api-utils": "^2.1.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/@typescript-eslint/utils": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.40.0.tgz", - "integrity": "sha512-Cgzi2MXSZyAUOY+BFwGs17s7ad/7L+gKt6Y8rAVVWS+7o6wrjeFN4nVfTpbE25MNcxyJ+iYUXflbs2xR9h4UBg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.40.0", - "@typescript-eslint/types": "8.40.0", - "@typescript-eslint/typescript-estree": "8.40.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <6.0.0" - } - }, - "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.40.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.40.0.tgz", - "integrity": "sha512-8CZ47QwalyRjsypfwnbI3hKy5gJDPmrkLjkgMxhi0+DZZ2QNx2naS6/hWoVYUHU7LU2zleF68V9miaVZvhFfTA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@typescript-eslint/types": "8.40.0", - "eslint-visitor-keys": "^4.2.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@ungap/structured-clone": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz", - "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==", - "license": "ISC" - }, - "node_modules/@unrs/resolver-binding-android-arm-eabi": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-android-arm-eabi/-/resolver-binding-android-arm-eabi-1.11.1.tgz", - "integrity": "sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, - "node_modules/@unrs/resolver-binding-android-arm64": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-android-arm64/-/resolver-binding-android-arm64-1.11.1.tgz", - "integrity": "sha512-lCxkVtb4wp1v+EoN+HjIG9cIIzPkX5OtM03pQYkG+U5O/wL53LC4QbIeazgiKqluGeVEeBlZahHalCaBvU1a2g==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, - "node_modules/@unrs/resolver-binding-darwin-arm64": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-darwin-arm64/-/resolver-binding-darwin-arm64-1.11.1.tgz", - "integrity": "sha512-gPVA1UjRu1Y/IsB/dQEsp2V1pm44Of6+LWvbLc9SDk1c2KhhDRDBUkQCYVWe6f26uJb3fOK8saWMgtX8IrMk3g==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@unrs/resolver-binding-darwin-x64": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-darwin-x64/-/resolver-binding-darwin-x64-1.11.1.tgz", - "integrity": "sha512-cFzP7rWKd3lZaCsDze07QX1SC24lO8mPty9vdP+YVa3MGdVgPmFc59317b2ioXtgCMKGiCLxJ4HQs62oz6GfRQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@unrs/resolver-binding-freebsd-x64": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-freebsd-x64/-/resolver-binding-freebsd-x64-1.11.1.tgz", - "integrity": "sha512-fqtGgak3zX4DCB6PFpsH5+Kmt/8CIi4Bry4rb1ho6Av2QHTREM+47y282Uqiu3ZRF5IQioJQ5qWRV6jduA+iGw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ] - }, - "node_modules/@unrs/resolver-binding-linux-arm-gnueabihf": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm-gnueabihf/-/resolver-binding-linux-arm-gnueabihf-1.11.1.tgz", - "integrity": "sha512-u92mvlcYtp9MRKmP+ZvMmtPN34+/3lMHlyMj7wXJDeXxuM0Vgzz0+PPJNsro1m3IZPYChIkn944wW8TYgGKFHw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-arm-musleabihf": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm-musleabihf/-/resolver-binding-linux-arm-musleabihf-1.11.1.tgz", - "integrity": "sha512-cINaoY2z7LVCrfHkIcmvj7osTOtm6VVT16b5oQdS4beibX2SYBwgYLmqhBjA1t51CarSaBuX5YNsWLjsqfW5Cw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-arm64-gnu": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm64-gnu/-/resolver-binding-linux-arm64-gnu-1.11.1.tgz", - "integrity": "sha512-34gw7PjDGB9JgePJEmhEqBhWvCiiWCuXsL9hYphDF7crW7UgI05gyBAi6MF58uGcMOiOqSJ2ybEeCvHcq0BCmQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-arm64-musl": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-arm64-musl/-/resolver-binding-linux-arm64-musl-1.11.1.tgz", - "integrity": "sha512-RyMIx6Uf53hhOtJDIamSbTskA99sPHS96wxVE/bJtePJJtpdKGXO1wY90oRdXuYOGOTuqjT8ACccMc4K6QmT3w==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-ppc64-gnu": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-ppc64-gnu/-/resolver-binding-linux-ppc64-gnu-1.11.1.tgz", - "integrity": "sha512-D8Vae74A4/a+mZH0FbOkFJL9DSK2R6TFPC9M+jCWYia/q2einCubX10pecpDiTmkJVUH+y8K3BZClycD8nCShA==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-riscv64-gnu": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-riscv64-gnu/-/resolver-binding-linux-riscv64-gnu-1.11.1.tgz", - "integrity": "sha512-frxL4OrzOWVVsOc96+V3aqTIQl1O2TjgExV4EKgRY09AJ9leZpEg8Ak9phadbuX0BA4k8U5qtvMSQQGGmaJqcQ==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-riscv64-musl": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-riscv64-musl/-/resolver-binding-linux-riscv64-musl-1.11.1.tgz", - "integrity": "sha512-mJ5vuDaIZ+l/acv01sHoXfpnyrNKOk/3aDoEdLO/Xtn9HuZlDD6jKxHlkN8ZhWyLJsRBxfv9GYM2utQ1SChKew==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-s390x-gnu": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-s390x-gnu/-/resolver-binding-linux-s390x-gnu-1.11.1.tgz", - "integrity": "sha512-kELo8ebBVtb9sA7rMe1Cph4QHreByhaZ2QEADd9NzIQsYNQpt9UkM9iqr2lhGr5afh885d/cB5QeTXSbZHTYPg==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-x64-gnu": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-x64-gnu/-/resolver-binding-linux-x64-gnu-1.11.1.tgz", - "integrity": "sha512-C3ZAHugKgovV5YvAMsxhq0gtXuwESUKc5MhEtjBpLoHPLYM+iuwSj3lflFwK3DPm68660rZ7G8BMcwSro7hD5w==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-linux-x64-musl": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-linux-x64-musl/-/resolver-binding-linux-x64-musl-1.11.1.tgz", - "integrity": "sha512-rV0YSoyhK2nZ4vEswT/QwqzqQXw5I6CjoaYMOX0TqBlWhojUf8P94mvI7nuJTeaCkkds3QE4+zS8Ko+GdXuZtA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@unrs/resolver-binding-wasm32-wasi": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-wasm32-wasi/-/resolver-binding-wasm32-wasi-1.11.1.tgz", - "integrity": "sha512-5u4RkfxJm+Ng7IWgkzi3qrFOvLvQYnPBmjmZQ8+szTK/b31fQCnleNl1GgEt7nIsZRIf5PLhPwT0WM+q45x/UQ==", - "cpu": [ - "wasm32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@napi-rs/wasm-runtime": "^0.2.11" - }, - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/@unrs/resolver-binding-win32-arm64-msvc": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-arm64-msvc/-/resolver-binding-win32-arm64-msvc-1.11.1.tgz", - "integrity": "sha512-nRcz5Il4ln0kMhfL8S3hLkxI85BXs3o8EYoattsJNdsX4YUU89iOkVn7g0VHSRxFuVMdM4Q1jEpIId1Ihim/Uw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@unrs/resolver-binding-win32-ia32-msvc": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-ia32-msvc/-/resolver-binding-win32-ia32-msvc-1.11.1.tgz", - "integrity": "sha512-DCEI6t5i1NmAZp6pFonpD5m7i6aFrpofcp4LA2i8IIq60Jyo28hamKBxNrZcyOwVOZkgsRp9O2sXWBWP8MnvIQ==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@unrs/resolver-binding-win32-x64-msvc": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/@unrs/resolver-binding-win32-x64-msvc/-/resolver-binding-win32-x64-msvc-1.11.1.tgz", - "integrity": "sha512-lrW200hZdbfRtztbygyaq/6jP6AKE8qQN2KvPcJ+x7wiD038YtnYtZ82IMNJ69GJibV7bwL3y9FgK+5w/pYt6g==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/abort-controller": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", - "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", - "license": "MIT", - "dependencies": { - "event-target-shim": "^5.0.0" - }, - "engines": { - "node": ">=6.5" - } - }, - "node_modules/acorn": { - "version": "8.15.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", - "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", - "dev": true, - "license": "MIT", - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/acorn-jsx": { - "version": "5.3.2", - "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", - "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" - } - }, - "node_modules/acorn-walk": { - "version": "8.3.4", - "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", - "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", - "dev": true, - "license": "MIT", - "dependencies": { - "acorn": "^8.11.0" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/agent-base": { - "version": "7.1.4", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", - "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/agentkeepalive": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", - "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", - "license": "MIT", - "dependencies": { - "humanize-ms": "^1.2.1" - }, - "engines": { - "node": ">= 8.0.0" - } - }, - "node_modules/ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", - "dev": true, - "license": "MIT", - "dependencies": { - "fast-deep-equal": "^3.1.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/ansi-escapes": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", - "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "type-fest": "^0.21.3" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "dev": true, - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/arg": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", - "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", - "dev": true, - "license": "MIT" - }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, - "license": "Python-2.0" - }, - "node_modules/aria-hidden": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.4.tgz", - "integrity": "sha512-y+CcFFwelSXpLZk/7fMB2mUbGtX9lKycf1MWJ7CaTIERyitVlyQx6C+sxcROU2BAJ24OiZyK+8wj2i8AlBoS3A==", - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/aria-query": { - "version": "5.3.2", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz", - "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/array-buffer-byte-length": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz", - "integrity": "sha512-LHE+8BuR7RYGDKvnrmcuSq3tDcKv9OFEXQt/HpbZhY7V6h0zlUXutnAD82GiFx9rdieCMjkvtcsPqBwgUl1Iiw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "is-array-buffer": "^3.0.5" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array-includes": { - "version": "3.1.8", - "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz", - "integrity": "sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.2", - "es-object-atoms": "^1.0.0", - "get-intrinsic": "^1.2.4", - "is-string": "^1.0.7" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.findlast": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz", - "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.2", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0", - "es-shim-unscopables": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.findlastindex": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.6.tgz", - "integrity": "sha512-F/TKATkzseUExPlfvmwQKGITM3DGTK+vkAsCZoDc5daVygbJBnjEUCbgkAvVFsgfXfX4YIqZ/27G3k3tdXrTxQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.4", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.9", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "es-shim-unscopables": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.flat": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.3.tgz", - "integrity": "sha512-rwG/ja1neyLqCuGZ5YYrznA62D4mZXg0i1cIskIUKSiqF3Cje9/wXAls9B9s1Wa2fomMsIv8czB8jZcPmxCXFg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-shim-unscopables": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.flatmap": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.3.tgz", - "integrity": "sha512-Y7Wt51eKJSyi80hFrJCePGGNo5ktJCslFuboqJsbf57CCPcm5zztluPlc4/aD8sWsKvlwatezpV4U1efk8kpjg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-shim-unscopables": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.tosorted": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz", - "integrity": "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.3", - "es-errors": "^1.3.0", - "es-shim-unscopables": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/arraybuffer.prototype.slice": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.4.tgz", - "integrity": "sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "array-buffer-byte-length": "^1.0.1", - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "is-array-buffer": "^3.0.4" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/ast-types-flow": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz", - "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/async-function": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/async-function/-/async-function-1.0.0.tgz", - "integrity": "sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "license": "MIT" - }, - "node_modules/available-typed-arrays": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", - "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "possible-typed-array-names": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/axe-core": { - "version": "4.10.3", - "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.10.3.tgz", - "integrity": "sha512-Xm7bpRXnDSX2YE2YFfBk2FnF0ep6tmG7xPh8iHee8MIcrgq762Nkce856dYtJYLkuIoYZvGfTs/PbZhideTcEg==", - "dev": true, - "license": "MPL-2.0", - "engines": { - "node": ">=4" - } - }, - "node_modules/axobject-query": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz", - "integrity": "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/babel-jest": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-30.2.0.tgz", - "integrity": "sha512-0YiBEOxWqKkSQWL9nNGGEgndoeL0ZpWrbLMNL5u/Kaxrli3Eaxlt3ZtIDktEvXt4L/R9r3ODr2zKwGM/2BjxVw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/transform": "30.2.0", - "@types/babel__core": "^7.20.5", - "babel-plugin-istanbul": "^7.0.1", - "babel-preset-jest": "30.2.0", - "chalk": "^4.1.2", - "graceful-fs": "^4.2.11", - "slash": "^3.0.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "@babel/core": "^7.11.0 || ^8.0.0-0" - } - }, - "node_modules/babel-plugin-istanbul": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-7.0.1.tgz", - "integrity": "sha512-D8Z6Qm8jCvVXtIRkBnqNHX0zJ37rQcFJ9u8WOS6tkYOsRdHBzypCstaxWiu5ZIlqQtviRYbgnRLSoCEvjqcqbA==", - "dev": true, - "license": "BSD-3-Clause", - "workspaces": [ - "test/babel-8" - ], - "dependencies": { - "@babel/helper-plugin-utils": "^7.0.0", - "@istanbuljs/load-nyc-config": "^1.0.0", - "@istanbuljs/schema": "^0.1.3", - "istanbul-lib-instrument": "^6.0.2", - "test-exclude": "^6.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/babel-plugin-jest-hoist": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-30.2.0.tgz", - "integrity": "sha512-ftzhzSGMUnOzcCXd6WHdBGMyuwy15Wnn0iyyWGKgBDLxf9/s5ABuraCSpBX2uG0jUg4rqJnxsLc5+oYBqoxVaA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/babel__core": "^7.20.5" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/babel-preset-current-node-syntax": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.2.0.tgz", - "integrity": "sha512-E/VlAEzRrsLEb2+dv8yp3bo4scof3l9nR4lrld+Iy5NyVqgVYUJnDAmunkhPMisRI32Qc4iRiz425d8vM++2fg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/plugin-syntax-async-generators": "^7.8.4", - "@babel/plugin-syntax-bigint": "^7.8.3", - "@babel/plugin-syntax-class-properties": "^7.12.13", - "@babel/plugin-syntax-class-static-block": "^7.14.5", - "@babel/plugin-syntax-import-attributes": "^7.24.7", - "@babel/plugin-syntax-import-meta": "^7.10.4", - "@babel/plugin-syntax-json-strings": "^7.8.3", - "@babel/plugin-syntax-logical-assignment-operators": "^7.10.4", - "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3", - "@babel/plugin-syntax-numeric-separator": "^7.10.4", - "@babel/plugin-syntax-object-rest-spread": "^7.8.3", - "@babel/plugin-syntax-optional-catch-binding": "^7.8.3", - "@babel/plugin-syntax-optional-chaining": "^7.8.3", - "@babel/plugin-syntax-private-property-in-object": "^7.14.5", - "@babel/plugin-syntax-top-level-await": "^7.14.5" - }, - "peerDependencies": { - "@babel/core": "^7.0.0 || ^8.0.0-0" - } - }, - "node_modules/babel-preset-jest": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-30.2.0.tgz", - "integrity": "sha512-US4Z3NOieAQumwFnYdUWKvUKh8+YSnS/gB3t6YBiz0bskpu7Pine8pPCheNxlPEW4wnUkma2a94YuW2q3guvCQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "babel-plugin-jest-hoist": "30.2.0", - "babel-preset-current-node-syntax": "^1.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "@babel/core": "^7.11.0 || ^8.0.0-beta.1" - } - }, - "node_modules/bail": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", - "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, - "license": "MIT" - }, - "node_modules/baseline-browser-mapping": { - "version": "2.8.18", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.18.tgz", - "integrity": "sha512-UYmTpOBwgPScZpS4A+YbapwWuBwasxvO/2IOHArSsAhL/+ZdmATBXTex3t+l2hXwLVYK382ibr/nKoY9GKe86w==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "baseline-browser-mapping": "dist/cli.js" - } - }, - "node_modules/brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "dev": true, - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browserslist": { - "version": "4.26.3", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.26.3.tgz", - "integrity": "sha512-lAUU+02RFBuCKQPj/P6NgjlbCnLBMp4UtgTx7vNHd3XSIJF87s9a5rA3aH2yw3GS9DqZAUbOtZdCCiZeVRqt0w==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "baseline-browser-mapping": "^2.8.9", - "caniuse-lite": "^1.0.30001746", - "electron-to-chromium": "^1.5.227", - "node-releases": "^2.0.21", - "update-browserslist-db": "^1.1.3" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - } - }, - "node_modules/bser": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz", - "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "node-int64": "^0.4.0" - } - }, - "node_modules/buffer-from": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", - "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/call-bind": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", - "integrity": "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.0", - "es-define-property": "^1.0.0", - "get-intrinsic": "^1.2.4", - "set-function-length": "^1.2.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/call-bound": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", - "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "get-intrinsic": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/camelcase": { - "version": "5.3.1", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", - "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001751", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001751.tgz", - "integrity": "sha512-A0QJhug0Ly64Ii3eIqHu5X51ebln3k4yTUkY1j8drqpWHVreg/VLijN48cZ1bYPiqOQuqpkIKnzr/Ul8V+p6Cw==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/caniuse-lite" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "CC-BY-4.0" - }, - "node_modules/ccount": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", - "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/chalk": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", - "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/char-regex": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz", - "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/character-entities": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", - "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-entities-html4": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", - "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-entities-legacy": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", - "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/character-reference-invalid": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", - "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/chownr": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", - "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", - "dev": true, - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/ci-info": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-4.3.1.tgz", - "integrity": "sha512-Wdy2Igu8OcBpI2pZePZ5oWjPC38tmDVx5WKUXKwlLYkA0ozo85sLsLvkBbBn/sZaSCMFOGZJ14fvW9t5/d7kdA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/sibiraj-s" - } - ], - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/cjs-module-lexer": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-2.1.0.tgz", - "integrity": "sha512-UX0OwmYRYQQetfrLEZeewIFFI+wSTofC+pMBLNuH3RUuu/xzG1oz84UCEDOSoQlN3fZ4+AzmV50ZYvGqkMh9yA==", - "dev": true, - "license": "MIT" - }, - "node_modules/class-variance-authority": { - "version": "0.7.1", - "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.1.tgz", - "integrity": "sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==", - "license": "Apache-2.0", - "dependencies": { - "clsx": "^2.1.1" - }, - "funding": { - "url": "https://polar.sh/cva" - } - }, - "node_modules/client-only": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz", - "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==", - "license": "MIT" - }, - "node_modules/cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/cliui/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/cliui/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/cliui/node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/clsx": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", - "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/co": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", - "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==", - "dev": true, - "license": "MIT", - "engines": { - "iojs": ">= 1.0.0", - "node": ">= 0.12.0" - } - }, - "node_modules/collect-v8-coverage": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.3.tgz", - "integrity": "sha512-1L5aqIkwPfiodaMgQunkF1zRhNqifHBmtbbbxcr6yVxxBnliw4TDOW6NxpO8DJLgJ16OT+Y4ztZqP6p/FtXnAw==", - "dev": true, - "license": "MIT" - }, - "node_modules/color": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", - "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", - "license": "MIT", - "optional": true, - "dependencies": { - "color-convert": "^2.0.1", - "color-string": "^1.9.0" - }, - "engines": { - "node": ">=12.5.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "devOptional": true, - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "devOptional": true, - "license": "MIT" - }, - "node_modules/color-string": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", - "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", - "license": "MIT", - "optional": true, - "dependencies": { - "color-name": "^1.0.0", - "simple-swizzle": "^0.2.2" - } - }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "license": "MIT", - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/comma-separated-tokens": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", - "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "dev": true, - "license": "MIT" - }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cookie": { - "version": "0.7.2", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", - "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/create-require": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", - "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/css.escape": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz", - "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==", - "dev": true, - "license": "MIT" - }, - "node_modules/cssstyle": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.6.0.tgz", - "integrity": "sha512-2z+rWdzbbSZv6/rhtvzvqeZQHrBaqgogqt85sqFNbabZOuFbCVFb8kPeEtZjiKkbrm395irpNKiYeFeLiQnFPg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@asamuzakjp/css-color": "^3.2.0", - "rrweb-cssom": "^0.8.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/csstype": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", - "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "license": "MIT" - }, - "node_modules/damerau-levenshtein": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz", - "integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==", - "dev": true, - "license": "BSD-2-Clause" - }, - "node_modules/data-urls": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz", - "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", - "dev": true, - "license": "MIT", - "dependencies": { - "whatwg-mimetype": "^4.0.0", - "whatwg-url": "^14.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/data-urls/node_modules/tr46": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.1.1.tgz", - "integrity": "sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw==", - "dev": true, - "license": "MIT", - "dependencies": { - "punycode": "^2.3.1" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/data-urls/node_modules/webidl-conversions": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", - "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/data-urls/node_modules/whatwg-url": { - "version": "14.2.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", - "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", - "dev": true, - "license": "MIT", - "dependencies": { - "tr46": "^5.1.0", - "webidl-conversions": "^7.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/data-view-buffer": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.2.tgz", - "integrity": "sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/data-view-byte-length": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.2.tgz", - "integrity": "sha512-tuhGbE6CfTM9+5ANGf+oQb72Ky/0+s3xKUpHvShfiz2RxMFgFPjsXuRLBVMtvMs15awe45SRb83D6wH4ew6wlQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/inspect-js" - } - }, - "node_modules/data-view-byte-offset": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.1.tgz", - "integrity": "sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "is-data-view": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/debug": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", - "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/decimal.js": { - "version": "10.6.0", - "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", - "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", - "dev": true, - "license": "MIT" - }, - "node_modules/decode-named-character-reference": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.2.0.tgz", - "integrity": "sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==", - "license": "MIT", - "dependencies": { - "character-entities": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/dedent": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.7.0.tgz", - "integrity": "sha512-HGFtf8yhuhGhqO07SV79tRp+br4MnbdjeVxotpn1QBl30pcLLCQjX5b2295ll0fv8RKDKsmWYrl05usHM9CewQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "babel-plugin-macros": "^3.1.0" - }, - "peerDependenciesMeta": { - "babel-plugin-macros": { - "optional": true - } - } - }, - "node_modules/deep-is": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", - "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/deepmerge": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", - "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/define-data-property": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", - "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-define-property": "^1.0.0", - "es-errors": "^1.3.0", - "gopd": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/define-properties": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", - "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-data-property": "^1.0.1", - "has-property-descriptors": "^1.0.0", - "object-keys": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "license": "MIT", - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/dequal": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", - "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/detect-libc": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz", - "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==", - "devOptional": true, - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/detect-newline": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", - "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/detect-node-es": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", - "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", - "license": "MIT" - }, - "node_modules/devlop": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", - "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", - "license": "MIT", - "dependencies": { - "dequal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/diff": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", - "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.3.1" - } - }, - "node_modules/doctrine": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", - "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/dom-accessibility-api": { - "version": "0.5.16", - "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz", - "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==", - "dev": true, - "license": "MIT" - }, - "node_modules/dunder-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", - "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.1", - "es-errors": "^1.3.0", - "gopd": "^1.2.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "dev": true, - "license": "MIT" - }, - "node_modules/electron-to-chromium": { - "version": "1.5.237", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.237.tgz", - "integrity": "sha512-icUt1NvfhGLar5lSWH3tHNzablaA5js3HVHacQimfP8ViEBOQv+L7DKEuHdbTZ0SKCO1ogTJTIL1Gwk9S6Qvcg==", - "dev": true, - "license": "ISC" - }, - "node_modules/emittery": { - "version": "0.13.1", - "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz", - "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sindresorhus/emittery?sponsor=1" - } - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true, - "license": "MIT" - }, - "node_modules/enhanced-resolve": { - "version": "5.18.3", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz", - "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==", - "dev": true, - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.4", - "tapable": "^2.2.0" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/entities": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", - "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/error-ex": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", - "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, - "node_modules/error-ex/node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", - "dev": true, - "license": "MIT" - }, - "node_modules/es-abstract": { - "version": "1.23.9", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.9.tgz", - "integrity": "sha512-py07lI0wjxAC/DcfK1S6G7iANonniZwTISvdPzk9hzeH0IZIshbuuFxLIU96OyF89Yb9hiqWn8M/bY83KY5vzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "array-buffer-byte-length": "^1.0.2", - "arraybuffer.prototype.slice": "^1.0.4", - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "data-view-buffer": "^1.0.2", - "data-view-byte-length": "^1.0.2", - "data-view-byte-offset": "^1.0.1", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0", - "es-set-tostringtag": "^2.1.0", - "es-to-primitive": "^1.3.0", - "function.prototype.name": "^1.1.8", - "get-intrinsic": "^1.2.7", - "get-proto": "^1.0.0", - "get-symbol-description": "^1.1.0", - "globalthis": "^1.0.4", - "gopd": "^1.2.0", - "has-property-descriptors": "^1.0.2", - "has-proto": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "internal-slot": "^1.1.0", - "is-array-buffer": "^3.0.5", - "is-callable": "^1.2.7", - "is-data-view": "^1.0.2", - "is-regex": "^1.2.1", - "is-shared-array-buffer": "^1.0.4", - "is-string": "^1.1.1", - "is-typed-array": "^1.1.15", - "is-weakref": "^1.1.0", - "math-intrinsics": "^1.1.0", - "object-inspect": "^1.13.3", - "object-keys": "^1.1.1", - "object.assign": "^4.1.7", - "own-keys": "^1.0.1", - "regexp.prototype.flags": "^1.5.3", - "safe-array-concat": "^1.1.3", - "safe-push-apply": "^1.0.0", - "safe-regex-test": "^1.1.0", - "set-proto": "^1.0.0", - "string.prototype.trim": "^1.2.10", - "string.prototype.trimend": "^1.0.9", - "string.prototype.trimstart": "^1.0.8", - "typed-array-buffer": "^1.0.3", - "typed-array-byte-length": "^1.0.3", - "typed-array-byte-offset": "^1.0.4", - "typed-array-length": "^1.0.7", - "unbox-primitive": "^1.1.0", - "which-typed-array": "^1.1.18" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/es-define-property": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", - "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-errors": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", - "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-iterator-helpers": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.2.1.tgz", - "integrity": "sha512-uDn+FE1yrDzyC0pCo961B2IHbdM8y/ACZsKD4dG6WqrjV53BADjwa7D+1aom2rsNVfLyDgU/eigvlJGJ08OQ4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.6", - "es-errors": "^1.3.0", - "es-set-tostringtag": "^2.0.3", - "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.6", - "globalthis": "^1.0.4", - "gopd": "^1.2.0", - "has-property-descriptors": "^1.0.2", - "has-proto": "^1.2.0", - "has-symbols": "^1.1.0", - "internal-slot": "^1.1.0", - "iterator.prototype": "^1.1.4", - "safe-array-concat": "^1.1.3" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-set-tostringtag": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", - "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-shim-unscopables": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/es-shim-unscopables/-/es-shim-unscopables-1.1.0.tgz", - "integrity": "sha512-d9T8ucsEhh8Bi1woXCf+TIKDIROLG5WCkxg8geBCbvk22kzwC5G2OnXVMO6FUsvQlgUUXQ2itephWDLqDzbeCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-to-primitive": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.3.0.tgz", - "integrity": "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-callable": "^1.2.7", - "is-date-object": "^1.0.5", - "is-symbol": "^1.0.4" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/escape-string-regexp": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", - "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/eslint": { - "version": "9.37.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.37.0.tgz", - "integrity": "sha512-XyLmROnACWqSxiGYArdef1fItQd47weqB7iwtfr9JHwRrqIXZdcFMvvEcL9xHCmL0SNsOvF0c42lWyM1U5dgig==", - "dev": true, - "license": "MIT", - "dependencies": { - "@eslint-community/eslint-utils": "^4.8.0", - "@eslint-community/regexpp": "^4.12.1", - "@eslint/config-array": "^0.21.0", - "@eslint/config-helpers": "^0.4.0", - "@eslint/core": "^0.16.0", - "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.37.0", - "@eslint/plugin-kit": "^0.4.0", - "@humanfs/node": "^0.16.6", - "@humanwhocodes/module-importer": "^1.0.1", - "@humanwhocodes/retry": "^0.4.2", - "@types/estree": "^1.0.6", - "@types/json-schema": "^7.0.15", - "ajv": "^6.12.4", - "chalk": "^4.0.0", - "cross-spawn": "^7.0.6", - "debug": "^4.3.2", - "escape-string-regexp": "^4.0.0", - "eslint-scope": "^8.4.0", - "eslint-visitor-keys": "^4.2.1", - "espree": "^10.4.0", - "esquery": "^1.5.0", - "esutils": "^2.0.2", - "fast-deep-equal": "^3.1.3", - "file-entry-cache": "^8.0.0", - "find-up": "^5.0.0", - "glob-parent": "^6.0.2", - "ignore": "^5.2.0", - "imurmurhash": "^0.1.4", - "is-glob": "^4.0.0", - "json-stable-stringify-without-jsonify": "^1.0.1", - "lodash.merge": "^4.6.2", - "minimatch": "^3.1.2", - "natural-compare": "^1.4.0", - "optionator": "^0.9.3" - }, - "bin": { - "eslint": "bin/eslint.js" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" - }, - "peerDependencies": { - "jiti": "*" - }, - "peerDependenciesMeta": { - "jiti": { - "optional": true - } - } - }, - "node_modules/eslint-config-next": { - "version": "15.5.6", - "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.5.6.tgz", - "integrity": "sha512-cGr3VQlPsZBEv8rtYp4BpG1KNXDqGvPo9VC1iaCgIA11OfziC/vczng+TnAS3WpRIR3Q5ye/6yl+CRUuZ1fPGg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@next/eslint-plugin-next": "15.5.6", - "@rushstack/eslint-patch": "^1.10.3", - "@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", - "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", - "eslint-import-resolver-node": "^0.3.6", - "eslint-import-resolver-typescript": "^3.5.2", - "eslint-plugin-import": "^2.31.0", - "eslint-plugin-jsx-a11y": "^6.10.0", - "eslint-plugin-react": "^7.37.0", - "eslint-plugin-react-hooks": "^5.0.0" - }, - "peerDependencies": { - "eslint": "^7.23.0 || ^8.0.0 || ^9.0.0", - "typescript": ">=3.3.1" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/eslint-config-prettier": { - "version": "10.1.8", - "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.8.tgz", - "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", - "dev": true, - "license": "MIT", - "bin": { - "eslint-config-prettier": "bin/cli.js" - }, - "funding": { - "url": "https://opencollective.com/eslint-config-prettier" - }, - "peerDependencies": { - "eslint": ">=7.0.0" - } - }, - "node_modules/eslint-import-resolver-node": { - "version": "0.3.9", - "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz", - "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==", - "dev": true, - "license": "MIT", - "dependencies": { - "debug": "^3.2.7", - "is-core-module": "^2.13.0", - "resolve": "^1.22.4" - } - }, - "node_modules/eslint-import-resolver-node/node_modules/debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.1" - } - }, - "node_modules/eslint-import-resolver-typescript": { - "version": "3.10.1", - "resolved": "https://registry.npmjs.org/eslint-import-resolver-typescript/-/eslint-import-resolver-typescript-3.10.1.tgz", - "integrity": "sha512-A1rHYb06zjMGAxdLSkN2fXPBwuSaQ0iO5M/hdyS0Ajj1VBaRp0sPD3dn1FhME3c/JluGFbwSxyCfqdSbtQLAHQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "@nolyfill/is-core-module": "1.0.39", - "debug": "^4.4.0", - "get-tsconfig": "^4.10.0", - "is-bun-module": "^2.0.0", - "stable-hash": "^0.0.5", - "tinyglobby": "^0.2.13", - "unrs-resolver": "^1.6.2" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint-import-resolver-typescript" - }, - "peerDependencies": { - "eslint": "*", - "eslint-plugin-import": "*", - "eslint-plugin-import-x": "*" - }, - "peerDependenciesMeta": { - "eslint-plugin-import": { - "optional": true - }, - "eslint-plugin-import-x": { - "optional": true - } - } - }, - "node_modules/eslint-module-utils": { - "version": "2.12.0", - "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.12.0.tgz", - "integrity": "sha512-wALZ0HFoytlyh/1+4wuZ9FJCD/leWHQzzrxJ8+rebyReSLk7LApMyd3WJaLVoN+D5+WIdJyDK1c6JnE65V4Zyg==", - "dev": true, - "license": "MIT", - "dependencies": { - "debug": "^3.2.7" - }, - "engines": { - "node": ">=4" - }, - "peerDependenciesMeta": { - "eslint": { - "optional": true - } - } - }, - "node_modules/eslint-module-utils/node_modules/debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.1" - } - }, - "node_modules/eslint-plugin-import": { - "version": "2.31.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.31.0.tgz", - "integrity": "sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@rtsao/scc": "^1.1.0", - "array-includes": "^3.1.8", - "array.prototype.findlastindex": "^1.2.5", - "array.prototype.flat": "^1.3.2", - "array.prototype.flatmap": "^1.3.2", - "debug": "^3.2.7", - "doctrine": "^2.1.0", - "eslint-import-resolver-node": "^0.3.9", - "eslint-module-utils": "^2.12.0", - "hasown": "^2.0.2", - "is-core-module": "^2.15.1", - "is-glob": "^4.0.3", - "minimatch": "^3.1.2", - "object.fromentries": "^2.0.8", - "object.groupby": "^1.0.3", - "object.values": "^1.2.0", - "semver": "^6.3.1", - "string.prototype.trimend": "^1.0.8", - "tsconfig-paths": "^3.15.0" - }, - "engines": { - "node": ">=4" - }, - "peerDependencies": { - "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0 || ^8 || ^9" - } - }, - "node_modules/eslint-plugin-import/node_modules/debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.1" - } - }, - "node_modules/eslint-plugin-import/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/eslint-plugin-jsx-a11y": { - "version": "6.10.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.10.2.tgz", - "integrity": "sha512-scB3nz4WmG75pV8+3eRUQOHZlNSUhFNq37xnpgRkCCELU3XMvXAxLk1eqWWyE22Ki4Q01Fnsw9BA3cJHDPgn2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "aria-query": "^5.3.2", - "array-includes": "^3.1.8", - "array.prototype.flatmap": "^1.3.2", - "ast-types-flow": "^0.0.8", - "axe-core": "^4.10.0", - "axobject-query": "^4.1.0", - "damerau-levenshtein": "^1.0.8", - "emoji-regex": "^9.2.2", - "hasown": "^2.0.2", - "jsx-ast-utils": "^3.3.5", - "language-tags": "^1.0.9", - "minimatch": "^3.1.2", - "object.fromentries": "^2.0.8", - "safe-regex-test": "^1.0.3", - "string.prototype.includes": "^2.0.1" - }, - "engines": { - "node": ">=4.0" - }, - "peerDependencies": { - "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9" - } - }, - "node_modules/eslint-plugin-prettier": { - "version": "5.5.4", - "resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.5.4.tgz", - "integrity": "sha512-swNtI95SToIz05YINMA6Ox5R057IMAmWZ26GqPxusAp1TZzj+IdY9tXNWWD3vkF/wEqydCONcwjTFpxybBqZsg==", - "dev": true, - "license": "MIT", - "dependencies": { - "prettier-linter-helpers": "^1.0.0", - "synckit": "^0.11.7" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint-plugin-prettier" - }, - "peerDependencies": { - "@types/eslint": ">=8.0.0", - "eslint": ">=8.0.0", - "eslint-config-prettier": ">= 7.0.0 <10.0.0 || >=10.1.0", - "prettier": ">=3.0.0" - }, - "peerDependenciesMeta": { - "@types/eslint": { - "optional": true - }, - "eslint-config-prettier": { - "optional": true - } - } - }, - "node_modules/eslint-plugin-react": { - "version": "7.37.5", - "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.37.5.tgz", - "integrity": "sha512-Qteup0SqU15kdocexFNAJMvCJEfa2xUKNV4CC1xsVMrIIqEy3SQ/rqyxCWNzfrd3/ldy6HMlD2e0JDVpDg2qIA==", - "dev": true, - "license": "MIT", - "dependencies": { - "array-includes": "^3.1.8", - "array.prototype.findlast": "^1.2.5", - "array.prototype.flatmap": "^1.3.3", - "array.prototype.tosorted": "^1.1.4", - "doctrine": "^2.1.0", - "es-iterator-helpers": "^1.2.1", - "estraverse": "^5.3.0", - "hasown": "^2.0.2", - "jsx-ast-utils": "^2.4.1 || ^3.0.0", - "minimatch": "^3.1.2", - "object.entries": "^1.1.9", - "object.fromentries": "^2.0.8", - "object.values": "^1.2.1", - "prop-types": "^15.8.1", - "resolve": "^2.0.0-next.5", - "semver": "^6.3.1", - "string.prototype.matchall": "^4.0.12", - "string.prototype.repeat": "^1.0.0" - }, - "engines": { - "node": ">=4" - }, - "peerDependencies": { - "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9.7" - } - }, - "node_modules/eslint-plugin-react-hooks": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.2.0.tgz", - "integrity": "sha512-+f15FfK64YQwZdJNELETdn5ibXEUQmW1DZL6KXhNnc2heoy/sg9VJJeT7n8TlMWouzWqSWavFkIhHyIbIAEapg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" - } - }, - "node_modules/eslint-plugin-react/node_modules/resolve": { - "version": "2.0.0-next.5", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz", - "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-core-module": "^2.13.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/eslint-plugin-react/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/eslint-scope": { - "version": "8.4.0", - "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz", - "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "esrecurse": "^4.3.0", - "estraverse": "^5.2.0" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/eslint-visitor-keys": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", - "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/espree": { - "version": "10.4.0", - "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz", - "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "acorn": "^8.15.0", - "acorn-jsx": "^5.3.2", - "eslint-visitor-keys": "^4.2.1" - }, - "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, - "license": "BSD-2-Clause", - "bin": { - "esparse": "bin/esparse.js", - "esvalidate": "bin/esvalidate.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/esquery": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", - "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "estraverse": "^5.1.0" - }, - "engines": { - "node": ">=0.10" - } - }, - "node_modules/esrecurse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", - "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "estraverse": "^5.2.0" - }, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/estraverse": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", - "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=4.0" - } - }, - "node_modules/estree-util-is-identifier-name": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", - "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/event-target-shim": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", - "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/execa": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", - "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cross-spawn": "^7.0.3", - "get-stream": "^6.0.0", - "human-signals": "^2.1.0", - "is-stream": "^2.0.0", - "merge-stream": "^2.0.0", - "npm-run-path": "^4.0.1", - "onetime": "^5.1.2", - "signal-exit": "^3.0.3", - "strip-final-newline": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sindresorhus/execa?sponsor=1" - } - }, - "node_modules/execa/node_modules/signal-exit": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", - "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/exit-x": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/exit-x/-/exit-x-0.2.2.tgz", - "integrity": "sha512-+I6B/IkJc1o/2tiURyz/ivu/O0nKNEArIUB5O7zBrlDVJr22SCLH3xTeEry428LvFhRzIA1g8izguxJ/gbNcVQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/expect": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/expect/-/expect-30.2.0.tgz", - "integrity": "sha512-u/feCi0GPsI+988gU2FLcsHyAHTU0MX1Wg68NhAnN7z/+C5wqG+CY8J53N9ioe8RXgaoz0nBR/TYMf3AycUuPw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/expect-utils": "30.2.0", - "@jest/get-type": "30.1.0", - "jest-matcher-utils": "30.2.0", - "jest-message-util": "30.2.0", - "jest-mock": "30.2.0", - "jest-util": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/extend": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", - "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", - "license": "MIT" - }, - "node_modules/fast-deep-equal": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true, - "license": "MIT" - }, - "node_modules/fast-diff": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/fast-diff/-/fast-diff-1.3.0.tgz", - "integrity": "sha512-VxPP4NqbUjj6MaAOafWeUn2cXWLcCtljklUtZf0Ind4XQ+QPtmA0b18zZy0jIQx+ExRVCR/ZQpBmik5lXshNsw==", - "dev": true, - "license": "Apache-2.0" - }, - "node_modules/fast-glob": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", - "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.4" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/fast-glob/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/fast-json-stable-stringify": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", - "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", - "dev": true, - "license": "MIT" - }, - "node_modules/fast-levenshtein": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", - "dev": true, - "license": "MIT" - }, - "node_modules/fastq": { - "version": "1.19.1", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", - "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/fb-watchman": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz", - "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "bser": "2.1.1" - } - }, - "node_modules/file-entry-cache": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", - "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "flat-cache": "^4.0.0" - }, - "engines": { - "node": ">=16.0.0" - } - }, - "node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "dev": true, - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-up": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", - "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", - "dev": true, - "license": "MIT", - "dependencies": { - "locate-path": "^6.0.0", - "path-exists": "^4.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/flat-cache": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz", - "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==", - "dev": true, - "license": "MIT", - "dependencies": { - "flatted": "^3.2.9", - "keyv": "^4.5.4" - }, - "engines": { - "node": ">=16" - } - }, - "node_modules/flatted": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz", - "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", - "dev": true, - "license": "ISC" - }, - "node_modules/for-each": { - "version": "0.3.5", - "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz", - "integrity": "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-callable": "^1.2.7" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/foreground-child": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", - "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", - "dev": true, - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.6", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/form-data": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", - "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", - "license": "MIT", - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/form-data-encoder": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", - "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", - "license": "MIT" - }, - "node_modules/form-data/node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/form-data/node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "license": "MIT", - "dependencies": { - "mime-db": "1.52.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/formdata-node": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", - "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", - "license": "MIT", - "dependencies": { - "node-domexception": "1.0.0", - "web-streams-polyfill": "4.0.0-beta.3" - }, - "engines": { - "node": ">= 12.20" - } - }, - "node_modules/framer-motion": { - "version": "12.23.24", - "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.23.24.tgz", - "integrity": "sha512-HMi5HRoRCTou+3fb3h9oTLyJGBxHfW+HnNE25tAXOvVx/IvwMHK0cx7IR4a2ZU6sh3IX1Z+4ts32PcYBOqka8w==", - "license": "MIT", - "dependencies": { - "motion-dom": "^12.23.23", - "motion-utils": "^12.23.6", - "tslib": "^2.4.0" - }, - "peerDependencies": { - "@emotion/is-prop-valid": "*", - "react": "^18.0.0 || ^19.0.0", - "react-dom": "^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@emotion/is-prop-valid": { - "optional": true - }, - "react": { - "optional": true - }, - "react-dom": { - "optional": true - } - } - }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", - "dev": true, - "license": "ISC" - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/function.prototype.name": { - "version": "1.1.8", - "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.8.tgz", - "integrity": "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "functions-have-names": "^1.2.3", - "hasown": "^2.0.2", - "is-callable": "^1.2.7" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/functions-have-names": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz", - "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "dev": true, - "license": "ISC", - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/get-intrinsic": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", - "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "function-bind": "^1.1.2", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "math-intrinsics": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-nonce": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", - "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/get-package-type": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", - "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/get-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", - "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/get-stream": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", - "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-symbol-description": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.1.0.tgz", - "integrity": "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-tsconfig": { - "version": "4.10.0", - "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz", - "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==", - "dev": true, - "license": "MIT", - "dependencies": { - "resolve-pkg-maps": "^1.0.0" - }, - "funding": { - "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" - } - }, - "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "dev": true, - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", - "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", - "dev": true, - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/glob/node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/glob/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/globals": { - "version": "14.0.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz", - "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globalthis": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", - "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-properties": "^1.2.1", - "gopd": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/gopd": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", - "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/graphemer": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", - "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", - "dev": true, - "license": "MIT" - }, - "node_modules/has-bigints": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz", - "integrity": "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/has-property-descriptors": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", - "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-define-property": "^1.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-proto": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.2.0.tgz", - "integrity": "sha512-KIL7eQPfHQRC8+XluaIw7BHUwwqL19bQn4hzNgdr+1wXoU0KKj6rufu47lhY7KbJR2C6T6+PfyN0Ea7wkSS+qQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-symbols": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", - "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-tostringtag": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", - "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "license": "MIT", - "dependencies": { - "has-symbols": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/hast-util-to-html": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-9.0.5.tgz", - "integrity": "sha512-OguPdidb+fbHQSU4Q4ZiLKnzWo8Wwsf5bZfbvu7//a9oTYoqD/fWpe96NuHkoS9h0ccGOTe0C4NGXdtS0iObOw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-whitespace": "^3.0.0", - "html-void-elements": "^3.0.0", - "mdast-util-to-hast": "^13.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0", - "stringify-entities": "^4.0.0", - "zwitch": "^2.0.4" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-to-jsx-runtime": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz", - "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/unist": "^3.0.0", - "comma-separated-tokens": "^2.0.0", - "devlop": "^1.0.0", - "estree-util-is-identifier-name": "^3.0.0", - "hast-util-whitespace": "^3.0.0", - "mdast-util-mdx-expression": "^2.0.0", - "mdast-util-mdx-jsx": "^3.0.0", - "mdast-util-mdxjs-esm": "^2.0.0", - "property-information": "^7.0.0", - "space-separated-tokens": "^2.0.0", - "style-to-js": "^1.0.0", - "unist-util-position": "^5.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/hast-util-whitespace": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", - "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/html-encoding-sniffer": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", - "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "whatwg-encoding": "^3.1.1" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/html-escaper": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", - "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", - "dev": true, - "license": "MIT" - }, - "node_modules/html-url-attributes": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", - "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/html-void-elements": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", - "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/http-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", - "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.0", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/https-proxy-agent": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", - "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/human-signals": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", - "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=10.17.0" - } - }, - "node_modules/humanize-ms": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", - "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", - "license": "MIT", - "dependencies": { - "ms": "^2.0.0" - } - }, - "node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/ignore": { - "version": "5.3.2", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", - "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/import-fresh": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", - "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/import-local": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.2.0.tgz", - "integrity": "sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==", - "dev": true, - "license": "MIT", - "dependencies": { - "pkg-dir": "^4.2.0", - "resolve-cwd": "^3.0.0" - }, - "bin": { - "import-local-fixture": "fixtures/cli.js" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/imurmurhash": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", - "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.8.19" - } - }, - "node_modules/indent-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", - "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", - "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", - "dev": true, - "license": "ISC", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/inline-style-parser": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.4.tgz", - "integrity": "sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==", - "license": "MIT" - }, - "node_modules/internal-slot": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.1.0.tgz", - "integrity": "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "hasown": "^2.0.2", - "side-channel": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/is-alphabetical": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", - "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-alphanumerical": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", - "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", - "license": "MIT", - "dependencies": { - "is-alphabetical": "^2.0.0", - "is-decimal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-array-buffer": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.5.tgz", - "integrity": "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-arrayish": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz", - "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==", - "license": "MIT", - "optional": true - }, - "node_modules/is-async-function": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.1.1.tgz", - "integrity": "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "async-function": "^1.0.0", - "call-bound": "^1.0.3", - "get-proto": "^1.0.1", - "has-tostringtag": "^1.0.2", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-bigint": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.1.0.tgz", - "integrity": "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "has-bigints": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-boolean-object": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.2.2.tgz", - "integrity": "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-bun-module": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-bun-module/-/is-bun-module-2.0.0.tgz", - "integrity": "sha512-gNCGbnnnnFAUGKeZ9PdbyeGYJqewpmc2aKHUEMO5nQPWU9lOmv7jcmQIv+qHD8fXW6W7qfuCwX4rY9LNRjXrkQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "semver": "^7.7.1" - } - }, - "node_modules/is-callable": { - "version": "1.2.7", - "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", - "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-core-module": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", - "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", - "dev": true, - "license": "MIT", - "dependencies": { - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-data-view": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.2.tgz", - "integrity": "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "get-intrinsic": "^1.2.6", - "is-typed-array": "^1.1.13" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-date-object": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.1.0.tgz", - "integrity": "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-decimal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", - "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-finalizationregistry": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.1.1.tgz", - "integrity": "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-generator-fn": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz", - "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/is-generator-function": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.1.0.tgz", - "integrity": "sha512-nPUB5km40q9e8UfN/Zc24eLlzdSf9OfKByBw9CIdw4H1giPMeA0OIJvbchsCu4npfI2QcMVBsGEBHKZ7wLTWmQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "get-proto": "^1.0.0", - "has-tostringtag": "^1.0.2", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-hexadecimal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", - "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/is-map": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", - "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-number-object": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.1.1.tgz", - "integrity": "sha512-lZhclumE1G6VYD8VHe35wFaIif+CTy5SJIi5+3y4psDgWu4wPDoBhF8NxUOinEc7pHgiTsT6MaBb92rKhhD+Xw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-plain-obj": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", - "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-potential-custom-element-name": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", - "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/is-regex": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz", - "integrity": "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "gopd": "^1.2.0", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-set": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz", - "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-shared-array-buffer": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.4.tgz", - "integrity": "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-stream": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", - "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-string": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.1.1.tgz", - "integrity": "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-symbol": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.1.1.tgz", - "integrity": "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "has-symbols": "^1.1.0", - "safe-regex-test": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-typed-array": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.15.tgz", - "integrity": "sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "which-typed-array": "^1.1.16" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakmap": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", - "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakref": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.1.1.tgz", - "integrity": "sha512-6i9mGWSlqzNMEqpCp93KwRS1uUOodk2OJ6b+sq7ZPDSy2WuI5NFIxp/254TytR8ftefexkWn5xNiHUNpPOfSew==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-weakset": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.4.tgz", - "integrity": "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "get-intrinsic": "^1.2.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/isarray": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz", - "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", - "dev": true, - "license": "MIT" - }, - "node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, - "license": "ISC" - }, - "node_modules/istanbul-lib-coverage": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", - "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=8" - } - }, - "node_modules/istanbul-lib-instrument": { - "version": "6.0.3", - "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.3.tgz", - "integrity": "sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "@babel/core": "^7.23.9", - "@babel/parser": "^7.23.9", - "@istanbuljs/schema": "^0.1.3", - "istanbul-lib-coverage": "^3.2.0", - "semver": "^7.5.4" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/istanbul-lib-report": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz", - "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "istanbul-lib-coverage": "^3.0.0", - "make-dir": "^4.0.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/istanbul-lib-source-maps": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-5.0.6.tgz", - "integrity": "sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "@jridgewell/trace-mapping": "^0.3.23", - "debug": "^4.1.1", - "istanbul-lib-coverage": "^3.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/istanbul-reports": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.2.0.tgz", - "integrity": "sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "html-escaper": "^2.0.0", - "istanbul-lib-report": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/iterator.prototype": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/iterator.prototype/-/iterator.prototype-1.1.5.tgz", - "integrity": "sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "es-object-atoms": "^1.0.0", - "get-intrinsic": "^1.2.6", - "get-proto": "^1.0.0", - "has-symbols": "^1.1.0", - "set-function-name": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/jest": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest/-/jest-30.2.0.tgz", - "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/core": "30.2.0", - "@jest/types": "30.2.0", - "import-local": "^3.2.0", - "jest-cli": "30.2.0" - }, - "bin": { - "jest": "bin/jest.js" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" - }, - "peerDependenciesMeta": { - "node-notifier": { - "optional": true - } - } - }, - "node_modules/jest-changed-files": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-30.2.0.tgz", - "integrity": "sha512-L8lR1ChrRnSdfeOvTrwZMlnWV8G/LLjQ0nG9MBclwWZidA2N5FviRki0Bvh20WRMOX31/JYvzdqTJrk5oBdydQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "execa": "^5.1.1", - "jest-util": "30.2.0", - "p-limit": "^3.1.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-circus": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-30.2.0.tgz", - "integrity": "sha512-Fh0096NC3ZkFx05EP2OXCxJAREVxj1BcW/i6EWqqymcgYKWjyyDpral3fMxVcHXg6oZM7iULer9wGRFvfpl+Tg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/expect": "30.2.0", - "@jest/test-result": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "chalk": "^4.1.2", - "co": "^4.6.0", - "dedent": "^1.6.0", - "is-generator-fn": "^2.1.0", - "jest-each": "30.2.0", - "jest-matcher-utils": "30.2.0", - "jest-message-util": "30.2.0", - "jest-runtime": "30.2.0", - "jest-snapshot": "30.2.0", - "jest-util": "30.2.0", - "p-limit": "^3.1.0", - "pretty-format": "30.2.0", - "pure-rand": "^7.0.0", - "slash": "^3.0.0", - "stack-utils": "^2.0.6" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-circus/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-circus/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-circus/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-cli": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-30.2.0.tgz", - "integrity": "sha512-Os9ukIvADX/A9sLt6Zse3+nmHtHaE6hqOsjQtNiugFTbKRHYIYtZXNGNK9NChseXy7djFPjndX1tL0sCTlfpAA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/core": "30.2.0", - "@jest/test-result": "30.2.0", - "@jest/types": "30.2.0", - "chalk": "^4.1.2", - "exit-x": "^0.2.2", - "import-local": "^3.2.0", - "jest-config": "30.2.0", - "jest-util": "30.2.0", - "jest-validate": "30.2.0", - "yargs": "^17.7.2" - }, - "bin": { - "jest": "bin/jest.js" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" - }, - "peerDependenciesMeta": { - "node-notifier": { - "optional": true - } - } - }, - "node_modules/jest-config": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-30.2.0.tgz", - "integrity": "sha512-g4WkyzFQVWHtu6uqGmQR4CQxz/CH3yDSlhzXMWzNjDx843gYjReZnMRanjRCq5XZFuQrGDxgUaiYWE8BRfVckA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.27.4", - "@jest/get-type": "30.1.0", - "@jest/pattern": "30.0.1", - "@jest/test-sequencer": "30.2.0", - "@jest/types": "30.2.0", - "babel-jest": "30.2.0", - "chalk": "^4.1.2", - "ci-info": "^4.2.0", - "deepmerge": "^4.3.1", - "glob": "^10.3.10", - "graceful-fs": "^4.2.11", - "jest-circus": "30.2.0", - "jest-docblock": "30.2.0", - "jest-environment-node": "30.2.0", - "jest-regex-util": "30.0.1", - "jest-resolve": "30.2.0", - "jest-runner": "30.2.0", - "jest-util": "30.2.0", - "jest-validate": "30.2.0", - "micromatch": "^4.0.8", - "parse-json": "^5.2.0", - "pretty-format": "30.2.0", - "slash": "^3.0.0", - "strip-json-comments": "^3.1.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "@types/node": "*", - "esbuild-register": ">=3.4.0", - "ts-node": ">=9.0.0" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "esbuild-register": { - "optional": true - }, - "ts-node": { - "optional": true - } - } - }, - "node_modules/jest-config/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-config/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-config/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-diff": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-30.2.0.tgz", - "integrity": "sha512-dQHFo3Pt4/NLlG5z4PxZ/3yZTZ1C7s9hveiOj+GCN+uT109NC2QgsoVZsVOAvbJ3RgKkvyLGXZV9+piDpWbm6A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/diff-sequences": "30.0.1", - "@jest/get-type": "30.1.0", - "chalk": "^4.1.2", - "pretty-format": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-diff/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-diff/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-diff/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-docblock": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-30.2.0.tgz", - "integrity": "sha512-tR/FFgZKS1CXluOQzZvNH3+0z9jXr3ldGSD8bhyuxvlVUwbeLOGynkunvlTMxchC5urrKndYiwCFC0DLVjpOCA==", - "dev": true, - "license": "MIT", - "dependencies": { - "detect-newline": "^3.1.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-each": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-30.2.0.tgz", - "integrity": "sha512-lpWlJlM7bCUf1mfmuqTA8+j2lNURW9eNafOy99knBM01i5CQeY5UH1vZjgT9071nDJac1M4XsbyI44oNOdhlDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0", - "@jest/types": "30.2.0", - "chalk": "^4.1.2", - "jest-util": "30.2.0", - "pretty-format": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-each/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-each/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-each/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-environment-jsdom": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-30.2.0.tgz", - "integrity": "sha512-zbBTiqr2Vl78pKp/laGBREYzbZx9ZtqPjOK4++lL4BNDhxRnahg51HtoDrk9/VjIy9IthNEWdKVd7H5bqBhiWQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/environment-jsdom-abstract": "30.2.0", - "@types/jsdom": "^21.1.7", - "@types/node": "*", - "jsdom": "^26.1.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "peerDependencies": { - "canvas": "^3.0.0" - }, - "peerDependenciesMeta": { - "canvas": { - "optional": true - } - } - }, - "node_modules/jest-environment-node": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-30.2.0.tgz", - "integrity": "sha512-ElU8v92QJ9UrYsKrxDIKCxu6PfNj4Hdcktcn0JX12zqNdqWHB0N+hwOnnBBXvjLd2vApZtuLUGs1QSY+MsXoNA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/fake-timers": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "jest-mock": "30.2.0", - "jest-util": "30.2.0", - "jest-validate": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-haste-map": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-30.2.0.tgz", - "integrity": "sha512-sQA/jCb9kNt+neM0anSj6eZhLZUIhQgwDt7cPGjumgLM4rXsfb9kpnlacmvZz3Q5tb80nS+oG/if+NBKrHC+Xw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "@types/node": "*", - "anymatch": "^3.1.3", - "fb-watchman": "^2.0.2", - "graceful-fs": "^4.2.11", - "jest-regex-util": "30.0.1", - "jest-util": "30.2.0", - "jest-worker": "30.2.0", - "micromatch": "^4.0.8", - "walker": "^1.0.8" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - }, - "optionalDependencies": { - "fsevents": "^2.3.3" - } - }, - "node_modules/jest-leak-detector": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-30.2.0.tgz", - "integrity": "sha512-M6jKAjyzjHG0SrQgwhgZGy9hFazcudwCNovY/9HPIicmNSBuockPSedAP9vlPK6ONFJ1zfyH/M2/YYJxOz5cdQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0", - "pretty-format": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-leak-detector/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-leak-detector/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-leak-detector/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-matcher-utils": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-30.2.0.tgz", - "integrity": "sha512-dQ94Nq4dbzmUWkQ0ANAWS9tBRfqCrn0bV9AMYdOi/MHW726xn7eQmMeRTpX2ViC00bpNaWXq+7o4lIQ3AX13Hg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0", - "chalk": "^4.1.2", - "jest-diff": "30.2.0", - "pretty-format": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-matcher-utils/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-matcher-utils/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-matcher-utils/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-message-util": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-30.2.0.tgz", - "integrity": "sha512-y4DKFLZ2y6DxTWD4cDe07RglV88ZiNEdlRfGtqahfbIjfsw1nMCPx49Uev4IA/hWn3sDKyAnSPwoYSsAEdcimw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@jest/types": "30.2.0", - "@types/stack-utils": "^2.0.3", - "chalk": "^4.1.2", - "graceful-fs": "^4.2.11", - "micromatch": "^4.0.8", - "pretty-format": "30.2.0", - "slash": "^3.0.0", - "stack-utils": "^2.0.6" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-message-util/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-message-util/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-message-util/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-mock": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-30.2.0.tgz", - "integrity": "sha512-JNNNl2rj4b5ICpmAcq+WbLH83XswjPbjH4T7yvGzfAGCPh1rw+xVNbtk+FnRslvt9lkCcdn9i1oAoKUuFsOxRw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "@types/node": "*", - "jest-util": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-pnp-resolver": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz", - "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - }, - "peerDependencies": { - "jest-resolve": "*" - }, - "peerDependenciesMeta": { - "jest-resolve": { - "optional": true - } - } - }, - "node_modules/jest-regex-util": { - "version": "30.0.1", - "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-30.0.1.tgz", - "integrity": "sha512-jHEQgBXAgc+Gh4g0p3bCevgRCVRkB4VB70zhoAE48gxeSr1hfUOsM/C2WoJgVL7Eyg//hudYENbm3Ne+/dRVVA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-resolve": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-30.2.0.tgz", - "integrity": "sha512-TCrHSxPlx3tBY3hWNtRQKbtgLhsXa1WmbJEqBlTBrGafd5fiQFByy2GNCEoGR+Tns8d15GaL9cxEzKOO3GEb2A==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^4.1.2", - "graceful-fs": "^4.2.11", - "jest-haste-map": "30.2.0", - "jest-pnp-resolver": "^1.2.3", - "jest-util": "30.2.0", - "jest-validate": "30.2.0", - "slash": "^3.0.0", - "unrs-resolver": "^1.7.11" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-resolve-dependencies": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-30.2.0.tgz", - "integrity": "sha512-xTOIGug/0RmIe3mmCqCT95yO0vj6JURrn1TKWlNbhiAefJRWINNPgwVkrVgt/YaerPzY3iItufd80v3lOrFJ2w==", - "dev": true, - "license": "MIT", - "dependencies": { - "jest-regex-util": "30.0.1", - "jest-snapshot": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-runner": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-30.2.0.tgz", - "integrity": "sha512-PqvZ2B2XEyPEbclp+gV6KO/F1FIFSbIwewRgmROCMBo/aZ6J1w8Qypoj2pEOcg3G2HzLlaP6VUtvwCI8dM3oqQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/console": "30.2.0", - "@jest/environment": "30.2.0", - "@jest/test-result": "30.2.0", - "@jest/transform": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "chalk": "^4.1.2", - "emittery": "^0.13.1", - "exit-x": "^0.2.2", - "graceful-fs": "^4.2.11", - "jest-docblock": "30.2.0", - "jest-environment-node": "30.2.0", - "jest-haste-map": "30.2.0", - "jest-leak-detector": "30.2.0", - "jest-message-util": "30.2.0", - "jest-resolve": "30.2.0", - "jest-runtime": "30.2.0", - "jest-util": "30.2.0", - "jest-watcher": "30.2.0", - "jest-worker": "30.2.0", - "p-limit": "^3.1.0", - "source-map-support": "0.5.13" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-runtime": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-30.2.0.tgz", - "integrity": "sha512-p1+GVX/PJqTucvsmERPMgCPvQJpFt4hFbM+VN3n8TMo47decMUcJbt+rgzwrEme0MQUA/R+1de2axftTHkKckg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/environment": "30.2.0", - "@jest/fake-timers": "30.2.0", - "@jest/globals": "30.2.0", - "@jest/source-map": "30.0.1", - "@jest/test-result": "30.2.0", - "@jest/transform": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "chalk": "^4.1.2", - "cjs-module-lexer": "^2.1.0", - "collect-v8-coverage": "^1.0.2", - "glob": "^10.3.10", - "graceful-fs": "^4.2.11", - "jest-haste-map": "30.2.0", - "jest-message-util": "30.2.0", - "jest-mock": "30.2.0", - "jest-regex-util": "30.0.1", - "jest-resolve": "30.2.0", - "jest-snapshot": "30.2.0", - "jest-util": "30.2.0", - "slash": "^3.0.0", - "strip-bom": "^4.0.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-runtime/node_modules/strip-bom": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", - "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/jest-snapshot": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-30.2.0.tgz", - "integrity": "sha512-5WEtTy2jXPFypadKNpbNkZ72puZCa6UjSr/7djeecHWOu7iYhSXSnHScT8wBz3Rn8Ena5d5RYRcsyKIeqG1IyA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.27.4", - "@babel/generator": "^7.27.5", - "@babel/plugin-syntax-jsx": "^7.27.1", - "@babel/plugin-syntax-typescript": "^7.27.1", - "@babel/types": "^7.27.3", - "@jest/expect-utils": "30.2.0", - "@jest/get-type": "30.1.0", - "@jest/snapshot-utils": "30.2.0", - "@jest/transform": "30.2.0", - "@jest/types": "30.2.0", - "babel-preset-current-node-syntax": "^1.2.0", - "chalk": "^4.1.2", - "expect": "30.2.0", - "graceful-fs": "^4.2.11", - "jest-diff": "30.2.0", - "jest-matcher-utils": "30.2.0", - "jest-message-util": "30.2.0", - "jest-util": "30.2.0", - "pretty-format": "30.2.0", - "semver": "^7.7.2", - "synckit": "^0.11.8" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-snapshot/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-snapshot/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-snapshot/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-util": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-30.2.0.tgz", - "integrity": "sha512-QKNsM0o3Xe6ISQU869e+DhG+4CK/48aHYdJZGlFQVTjnbvgpcKyxpzk29fGiO7i/J8VENZ+d2iGnSsvmuHywlA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.2.0", - "@types/node": "*", - "chalk": "^4.1.2", - "ci-info": "^4.2.0", - "graceful-fs": "^4.2.11", - "picomatch": "^4.0.2" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-util/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/jest-validate": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-30.2.0.tgz", - "integrity": "sha512-FBGWi7dP2hpdi8nBoWxSsLvBFewKAg0+uSQwBaof4Y4DPgBabXgpSYC5/lR7VmnIlSpASmCi/ntRWPbv7089Pw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0", - "@jest/types": "30.2.0", - "camelcase": "^6.3.0", - "chalk": "^4.1.2", - "leven": "^3.1.0", - "pretty-format": "30.2.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-validate/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/jest-validate/node_modules/camelcase": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", - "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/jest-validate/node_modules/pretty-format": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", - "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.0.5", - "ansi-styles": "^5.2.0", - "react-is": "^18.3.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-validate/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/jest-watcher": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-30.2.0.tgz", - "integrity": "sha512-PYxa28dxJ9g777pGm/7PrbnMeA0Jr7osHP9bS7eJy9DuAjMgdGtxgf0uKMyoIsTWAkIbUW5hSDdJ3urmgXBqxg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/test-result": "30.2.0", - "@jest/types": "30.2.0", - "@types/node": "*", - "ansi-escapes": "^4.3.2", - "chalk": "^4.1.2", - "emittery": "^0.13.1", - "jest-util": "30.2.0", - "string-length": "^4.0.2" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-worker": { - "version": "30.2.0", - "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-30.2.0.tgz", - "integrity": "sha512-0Q4Uk8WF7BUwqXHuAjc23vmopWJw5WH7w2tqBoUOZpOjW/ZnR44GXXd1r82RvnmI2GZge3ivrYXk/BE2+VtW2g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*", - "@ungap/structured-clone": "^1.3.0", - "jest-util": "30.2.0", - "merge-stream": "^2.0.0", - "supports-color": "^8.1.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/jest-worker/node_modules/supports-color": { - "version": "8.1.1", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", - "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/supports-color?sponsor=1" - } - }, - "node_modules/jiti": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", - "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", - "dev": true, - "license": "MIT", - "bin": { - "jiti": "lib/jiti-cli.mjs" - } - }, - "node_modules/jose": { - "version": "4.15.9", - "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.9.tgz", - "integrity": "sha512-1vUQX+IdDMVPj4k8kOxgUqlcK518yluMuGZwqlr44FS1ppZB/5GWh4rZG89erpOBOJjU/OBsnCVFfapsRz6nEA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/jsdom": { - "version": "26.1.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-26.1.0.tgz", - "integrity": "sha512-Cvc9WUhxSMEo4McES3P7oK3QaXldCfNWp7pl2NNeiIFlCoLr3kfq9kb1fxftiwk1FLV7CvpvDfonxtzUDeSOPg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cssstyle": "^4.2.1", - "data-urls": "^5.0.0", - "decimal.js": "^10.5.0", - "html-encoding-sniffer": "^4.0.0", - "http-proxy-agent": "^7.0.2", - "https-proxy-agent": "^7.0.6", - "is-potential-custom-element-name": "^1.0.1", - "nwsapi": "^2.2.16", - "parse5": "^7.2.1", - "rrweb-cssom": "^0.8.0", - "saxes": "^6.0.0", - "symbol-tree": "^3.2.4", - "tough-cookie": "^5.1.1", - "w3c-xmlserializer": "^5.0.0", - "webidl-conversions": "^7.0.0", - "whatwg-encoding": "^3.1.1", - "whatwg-mimetype": "^4.0.0", - "whatwg-url": "^14.1.1", - "ws": "^8.18.0", - "xml-name-validator": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "canvas": "^3.0.0" - }, - "peerDependenciesMeta": { - "canvas": { - "optional": true - } - } - }, - "node_modules/jsdom/node_modules/tr46": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.1.1.tgz", - "integrity": "sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw==", - "dev": true, - "license": "MIT", - "dependencies": { - "punycode": "^2.3.1" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/jsdom/node_modules/webidl-conversions": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", - "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/jsdom/node_modules/whatwg-url": { - "version": "14.2.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.2.0.tgz", - "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", - "dev": true, - "license": "MIT", - "dependencies": { - "tr46": "^5.1.0", - "webidl-conversions": "^7.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/jsesc": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", - "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", - "dev": true, - "license": "MIT", - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/json-buffer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", - "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/json-parse-even-better-errors": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", - "dev": true, - "license": "MIT" - }, - "node_modules/json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true, - "license": "MIT" - }, - "node_modules/json-stable-stringify-without-jsonify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", - "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", - "dev": true, - "license": "MIT" - }, - "node_modules/json5": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", - "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", - "dev": true, - "license": "MIT", - "dependencies": { - "minimist": "^1.2.0" - }, - "bin": { - "json5": "lib/cli.js" - } - }, - "node_modules/jsx-ast-utils": { - "version": "3.3.5", - "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz", - "integrity": "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "array-includes": "^3.1.6", - "array.prototype.flat": "^1.3.1", - "object.assign": "^4.1.4", - "object.values": "^1.1.6" - }, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/keyv": { - "version": "4.5.4", - "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", - "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", - "dev": true, - "license": "MIT", - "dependencies": { - "json-buffer": "3.0.1" - } - }, - "node_modules/language-subtag-registry": { - "version": "0.3.23", - "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.23.tgz", - "integrity": "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==", - "dev": true, - "license": "CC0-1.0" - }, - "node_modules/language-tags": { - "version": "1.0.9", - "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.9.tgz", - "integrity": "sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA==", - "dev": true, - "license": "MIT", - "dependencies": { - "language-subtag-registry": "^0.3.20" - }, - "engines": { - "node": ">=0.10" - } - }, - "node_modules/leven": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", - "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/levn": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", - "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "prelude-ls": "^1.2.1", - "type-check": "~0.4.0" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/lightningcss": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz", - "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==", - "dev": true, - "license": "MPL-2.0", - "dependencies": { - "detect-libc": "^2.0.3" - }, - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - }, - "optionalDependencies": { - "lightningcss-darwin-arm64": "1.30.1", - "lightningcss-darwin-x64": "1.30.1", - "lightningcss-freebsd-x64": "1.30.1", - "lightningcss-linux-arm-gnueabihf": "1.30.1", - "lightningcss-linux-arm64-gnu": "1.30.1", - "lightningcss-linux-arm64-musl": "1.30.1", - "lightningcss-linux-x64-gnu": "1.30.1", - "lightningcss-linux-x64-musl": "1.30.1", - "lightningcss-win32-arm64-msvc": "1.30.1", - "lightningcss-win32-x64-msvc": "1.30.1" - } - }, - "node_modules/lightningcss-darwin-arm64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz", - "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-darwin-x64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz", - "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-freebsd-x64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz", - "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-linux-arm-gnueabihf": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz", - "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-linux-arm64-gnu": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz", - "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-linux-arm64-musl": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz", - "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-linux-x64-gnu": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz", - "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-linux-x64-musl": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz", - "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-win32-arm64-msvc": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz", - "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lightningcss-win32-x64-msvc": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz", - "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MPL-2.0", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lines-and-columns": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", - "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", - "dev": true, - "license": "MIT" - }, - "node_modules/llama-stack-client": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.0.tgz", - "integrity": "sha512-76K/t1doaGmlBbDxCADaral9Vccvys9P8pqAMIhwBhMAqWudCEORrMMhUSg+pjhamWmEKj3wa++d4zeOGbfN/w==", - "license": "MIT", - "dependencies": { - "@types/node": "^18.11.18", - "@types/node-fetch": "^2.6.4", - "abort-controller": "^3.0.0", - "agentkeepalive": "^4.2.1", - "form-data-encoder": "1.7.2", - "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7" - } - }, - "node_modules/llama-stack-client/node_modules/@types/node": { - "version": "18.19.100", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.100.tgz", - "integrity": "sha512-ojmMP8SZBKprc3qGrGk8Ujpo80AXkrP7G2tOT4VWr5jlr5DHjsJF+emXJz+Wm0glmy4Js62oKMdZZ6B9Y+tEcA==", - "license": "MIT", - "dependencies": { - "undici-types": "~5.26.4" - } - }, - "node_modules/llama-stack-client/node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" - }, - "node_modules/locate-path": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", - "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-locate": "^5.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lodash.merge": { - "version": "4.6.2", - "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", - "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/longest-streak": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", - "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/lru-cache": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", - "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^3.0.2" - } - }, - "node_modules/lru-cache/node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "dev": true, - "license": "ISC" - }, - "node_modules/lucide-react": { - "version": "0.545.0", - "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.545.0.tgz", - "integrity": "sha512-7r1/yUuflQDSt4f1bpn5ZAocyIxcTyVyBBChSVtBKn5M+392cPmI5YJMWOJKk/HUWGm5wg83chlAZtCcGbEZtw==", - "license": "ISC", - "peerDependencies": { - "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/lz-string": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", - "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==", - "dev": true, - "license": "MIT", - "bin": { - "lz-string": "bin/bin.js" - } - }, - "node_modules/magic-string": { - "version": "0.30.19", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.19.tgz", - "integrity": "sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.5" - } - }, - "node_modules/make-dir": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz", - "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==", - "dev": true, - "license": "MIT", - "dependencies": { - "semver": "^7.5.3" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/make-error": { - "version": "1.3.6", - "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", - "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", - "dev": true, - "license": "ISC" - }, - "node_modules/makeerror": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz", - "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "tmpl": "1.0.5" - } - }, - "node_modules/markdown-table": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", - "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/math-intrinsics": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", - "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/mdast-util-find-and-replace": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.2.tgz", - "integrity": "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "escape-string-regexp": "^5.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-find-and-replace/node_modules/escape-string-regexp": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", - "integrity": "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/mdast-util-from-markdown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz", - "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "mdast-util-to-string": "^4.0.0", - "micromark": "^4.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-decode-string": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unist-util-stringify-position": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", - "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", - "license": "MIT", - "dependencies": { - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-gfm-autolink-literal": "^2.0.0", - "mdast-util-gfm-footnote": "^2.0.0", - "mdast-util-gfm-strikethrough": "^2.0.0", - "mdast-util-gfm-table": "^2.0.0", - "mdast-util-gfm-task-list-item": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-autolink-literal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", - "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "ccount": "^2.0.0", - "devlop": "^1.0.0", - "mdast-util-find-and-replace": "^3.0.0", - "micromark-util-character": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-footnote": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.1.0.tgz", - "integrity": "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-strikethrough": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", - "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-table": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", - "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "markdown-table": "^3.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-gfm-task-list-item": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", - "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdx-expression": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz", - "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdx-jsx": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", - "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "ccount": "^2.0.0", - "devlop": "^1.1.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0", - "parse-entities": "^4.0.0", - "stringify-entities": "^4.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-mdxjs-esm": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", - "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", - "license": "MIT", - "dependencies": { - "@types/estree-jsx": "^1.0.0", - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "mdast-util-from-markdown": "^2.0.0", - "mdast-util-to-markdown": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-phrasing": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", - "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-hast": { - "version": "13.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", - "integrity": "sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "@ungap/structured-clone": "^1.0.0", - "devlop": "^1.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "trim-lines": "^3.0.0", - "unist-util-position": "^5.0.0", - "unist-util-visit": "^5.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-markdown": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz", - "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "@types/unist": "^3.0.0", - "longest-streak": "^3.0.0", - "mdast-util-phrasing": "^4.0.0", - "mdast-util-to-string": "^4.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-decode-string": "^2.0.0", - "unist-util-visit": "^5.0.0", - "zwitch": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/mdast-util-to-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", - "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/merge-stream": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", - "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", - "dev": true, - "license": "MIT" - }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromark": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", - "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "@types/debug": "^4.0.0", - "debug": "^4.0.0", - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "micromark-core-commonmark": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-combine-extensions": "^2.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-encode": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-subtokenize": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-core-commonmark": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz", - "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "devlop": "^1.0.0", - "micromark-factory-destination": "^2.0.0", - "micromark-factory-label": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-factory-title": "^2.0.0", - "micromark-factory-whitespace": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-html-tag-name": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-subtokenize": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-extension-gfm": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", - "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", - "license": "MIT", - "dependencies": { - "micromark-extension-gfm-autolink-literal": "^2.0.0", - "micromark-extension-gfm-footnote": "^2.0.0", - "micromark-extension-gfm-strikethrough": "^2.0.0", - "micromark-extension-gfm-table": "^2.0.0", - "micromark-extension-gfm-tagfilter": "^2.0.0", - "micromark-extension-gfm-task-list-item": "^2.0.0", - "micromark-util-combine-extensions": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-autolink-literal": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", - "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-footnote": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", - "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-core-commonmark": "^2.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-normalize-identifier": "^2.0.0", - "micromark-util-sanitize-uri": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-strikethrough": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", - "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-classify-character": "^2.0.0", - "micromark-util-resolve-all": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-table": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", - "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-tagfilter": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", - "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", - "license": "MIT", - "dependencies": { - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-extension-gfm-task-list-item": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", - "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/micromark-factory-destination": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", - "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-label": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz", - "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-space": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz", - "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-title": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz", - "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-factory-whitespace": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz", - "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-factory-space": "^2.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-character": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz", - "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-chunked": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz", - "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-classify-character": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz", - "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-combine-extensions": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz", - "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-chunked": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-decode-numeric-character-reference": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz", - "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-decode-string": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz", - "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "decode-named-character-reference": "^1.0.0", - "micromark-util-character": "^2.0.0", - "micromark-util-decode-numeric-character-reference": "^2.0.0", - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-encode": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz", - "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-html-tag-name": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz", - "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-normalize-identifier": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz", - "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-resolve-all": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz", - "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-sanitize-uri": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz", - "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "micromark-util-character": "^2.0.0", - "micromark-util-encode": "^2.0.0", - "micromark-util-symbol": "^2.0.0" - } - }, - "node_modules/micromark-util-subtokenize": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz", - "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT", - "dependencies": { - "devlop": "^1.0.0", - "micromark-util-chunked": "^2.0.0", - "micromark-util-symbol": "^2.0.0", - "micromark-util-types": "^2.0.0" - } - }, - "node_modules/micromark-util-symbol": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz", - "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromark-util-types": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz", - "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==", - "funding": [ - { - "type": "GitHub Sponsors", - "url": "https://github.com/sponsors/unifiedjs" - }, - { - "type": "OpenCollective", - "url": "https://opencollective.com/unified" - } - ], - "license": "MIT" - }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "dev": true, - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/mimic-fn": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", - "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/minizlib": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", - "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", - "dev": true, - "license": "MIT", - "dependencies": { - "minipass": "^7.1.2" - }, - "engines": { - "node": ">= 18" - } - }, - "node_modules/motion-dom": { - "version": "12.23.23", - "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz", - "integrity": "sha512-n5yolOs0TQQBRUFImrRfs/+6X4p3Q4n1dUEqt/H58Vx7OW6RF+foWEgmTVDhIWJIMXOuNNL0apKH2S16en9eiA==", - "license": "MIT", - "dependencies": { - "motion-utils": "^12.23.6" - } - }, - "node_modules/motion-utils": { - "version": "12.23.6", - "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.23.6.tgz", - "integrity": "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ==", - "license": "MIT" - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "license": "MIT" - }, - "node_modules/nanoid": { - "version": "3.3.11", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", - "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/napi-postinstall": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/napi-postinstall/-/napi-postinstall-0.3.4.tgz", - "integrity": "sha512-PHI5f1O0EP5xJ9gQmFGMS6IZcrVvTjpXjz7Na41gTE7eE2hK11lg04CECCYEEjdc17EV4DO+fkGEtt7TpTaTiQ==", - "dev": true, - "license": "MIT", - "bin": { - "napi-postinstall": "lib/cli.js" - }, - "engines": { - "node": "^12.20.0 || ^14.18.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/napi-postinstall" - } - }, - "node_modules/natural-compare": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", - "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", - "dev": true, - "license": "MIT" - }, - "node_modules/next": { - "version": "15.5.4", - "resolved": "https://registry.npmjs.org/next/-/next-15.5.4.tgz", - "integrity": "sha512-xH4Yjhb82sFYQfY3vbkJfgSDgXvBB6a8xPs9i35k6oZJRoQRihZH+4s9Yo2qsWpzBmZ3lPXaJ2KPXLfkvW4LnA==", - "license": "MIT", - "dependencies": { - "@next/env": "15.5.4", - "@swc/helpers": "0.5.15", - "caniuse-lite": "^1.0.30001579", - "postcss": "8.4.31", - "styled-jsx": "5.1.6" - }, - "bin": { - "next": "dist/bin/next" - }, - "engines": { - "node": "^18.18.0 || ^19.8.0 || >= 20.0.0" - }, - "optionalDependencies": { - "@next/swc-darwin-arm64": "15.5.4", - "@next/swc-darwin-x64": "15.5.4", - "@next/swc-linux-arm64-gnu": "15.5.4", - "@next/swc-linux-arm64-musl": "15.5.4", - "@next/swc-linux-x64-gnu": "15.5.4", - "@next/swc-linux-x64-musl": "15.5.4", - "@next/swc-win32-arm64-msvc": "15.5.4", - "@next/swc-win32-x64-msvc": "15.5.4", - "sharp": "^0.34.3" - }, - "peerDependencies": { - "@opentelemetry/api": "^1.1.0", - "@playwright/test": "^1.51.1", - "babel-plugin-react-compiler": "*", - "react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", - "react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", - "sass": "^1.3.0" - }, - "peerDependenciesMeta": { - "@opentelemetry/api": { - "optional": true - }, - "@playwright/test": { - "optional": true - }, - "babel-plugin-react-compiler": { - "optional": true - }, - "sass": { - "optional": true - } - } - }, - "node_modules/next-auth": { - "version": "4.24.11", - "resolved": "https://registry.npmjs.org/next-auth/-/next-auth-4.24.11.tgz", - "integrity": "sha512-pCFXzIDQX7xmHFs4KVH4luCjaCbuPRtZ9oBUjUhOk84mZ9WVPf94n87TxYI4rSRf9HmfHEF8Yep3JrYDVOo3Cw==", - "license": "ISC", - "dependencies": { - "@babel/runtime": "^7.20.13", - "@panva/hkdf": "^1.0.2", - "cookie": "^0.7.0", - "jose": "^4.15.5", - "oauth": "^0.9.15", - "openid-client": "^5.4.0", - "preact": "^10.6.3", - "preact-render-to-string": "^5.1.19", - "uuid": "^8.3.2" - }, - "peerDependencies": { - "@auth/core": "0.34.2", - "next": "^12.2.5 || ^13 || ^14 || ^15", - "nodemailer": "^6.6.5", - "react": "^17.0.2 || ^18 || ^19", - "react-dom": "^17.0.2 || ^18 || ^19" - }, - "peerDependenciesMeta": { - "@auth/core": { - "optional": true - }, - "nodemailer": { - "optional": true - } - } - }, - "node_modules/next-themes": { - "version": "0.4.6", - "resolved": "https://registry.npmjs.org/next-themes/-/next-themes-0.4.6.tgz", - "integrity": "sha512-pZvgD5L0IEvX5/9GWyHMf3m8BKiVQwsCMHfoFosXtXBMnaS0ZnIJ9ST4b4NqLVKDEm8QBxoNNGNaBv2JNF6XNA==", - "license": "MIT", - "peerDependencies": { - "react": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc" - } - }, - "node_modules/next/node_modules/postcss": { - "version": "8.4.31", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", - "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.6", - "picocolors": "^1.0.0", - "source-map-js": "^1.0.2" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/node-domexception": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", - "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", - "deprecated": "Use your platform's native DOMException instead", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/jimmywarting" - }, - { - "type": "github", - "url": "https://paypal.me/jimmywarting" - } - ], - "license": "MIT", - "engines": { - "node": ">=10.5.0" - } - }, - "node_modules/node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/node-int64": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", - "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==", - "dev": true, - "license": "MIT" - }, - "node_modules/node-releases": { - "version": "2.0.25", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.25.tgz", - "integrity": "sha512-4auku8B/vw5psvTiiN9j1dAOsXvMoGqJuKJcR+dTdqiXEK20mMTk1UEo3HS16LeGQsVG6+qKTPM9u/qQ2LqATA==", - "dev": true, - "license": "MIT" - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/npm-run-path": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", - "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", - "dev": true, - "license": "MIT", - "dependencies": { - "path-key": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/nwsapi": { - "version": "2.2.22", - "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.22.tgz", - "integrity": "sha512-ujSMe1OWVn55euT1ihwCI1ZcAaAU3nxUiDwfDQldc51ZXaB9m2AyOn6/jh1BLe2t/G8xd6uKG1UBF2aZJeg2SQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/oauth": { - "version": "0.9.15", - "resolved": "https://registry.npmjs.org/oauth/-/oauth-0.9.15.tgz", - "integrity": "sha512-a5ERWK1kh38ExDEfoO6qUHJb32rd7aYmPHuyCu3Fta/cnICvYmgd2uhuKXvPD+PXB+gCEYYEaQdIRAjCOwAKNA==", - "license": "MIT" - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-hash": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-2.2.0.tgz", - "integrity": "sha512-gScRMn0bS5fH+IuwyIFgnh9zBdo4DV+6GhygmWM9HyNJSgS0hScp1f5vjtm7oIIOiT9trXrShAkLFSc2IqKNgw==", - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/object-inspect": { - "version": "1.13.4", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", - "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object-keys": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", - "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.assign": { - "version": "4.1.7", - "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.7.tgz", - "integrity": "sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0", - "has-symbols": "^1.1.0", - "object-keys": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object.entries": { - "version": "1.1.9", - "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.9.tgz", - "integrity": "sha512-8u/hfXFRBD1O0hPUjioLhoWFHRmt6tKA4/vZPyckBr18l1KE9uHrFaFaUi8MDRTpi4uak2goyPTSNJLXX2k2Hw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.4", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.fromentries": { - "version": "2.0.8", - "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz", - "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.2", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object.groupby": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz", - "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.values": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.1.tgz", - "integrity": "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/oidc-token-hash": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/oidc-token-hash/-/oidc-token-hash-5.1.0.tgz", - "integrity": "sha512-y0W+X7Ppo7oZX6eovsRkuzcSM40Bicg2JEJkDJ4irIt1wsYAP5MLSNv+QAogO8xivMffw/9OvV3um1pxXgt1uA==", - "license": "MIT", - "engines": { - "node": "^10.13.0 || >=12.0.0" - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "dev": true, - "license": "ISC", - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/onetime": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", - "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", - "dev": true, - "license": "MIT", - "dependencies": { - "mimic-fn": "^2.1.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/oniguruma-parser": { - "version": "0.12.1", - "resolved": "https://registry.npmjs.org/oniguruma-parser/-/oniguruma-parser-0.12.1.tgz", - "integrity": "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==", - "license": "MIT" - }, - "node_modules/oniguruma-to-es": { - "version": "4.3.3", - "resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-4.3.3.tgz", - "integrity": "sha512-rPiZhzC3wXwE59YQMRDodUwwT9FZ9nNBwQQfsd1wfdtlKEyCdRV0avrTcSZ5xlIvGRVPd/cx6ZN45ECmS39xvg==", - "license": "MIT", - "dependencies": { - "oniguruma-parser": "^0.12.1", - "regex": "^6.0.1", - "regex-recursion": "^6.0.2" - } - }, - "node_modules/openid-client": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-5.7.1.tgz", - "integrity": "sha512-jDBPgSVfTnkIh71Hg9pRvtJc6wTwqjRkN88+gCFtYWrlP4Yx2Dsrow8uPi3qLr/aeymPF3o2+dS+wOpglK04ew==", - "license": "MIT", - "dependencies": { - "jose": "^4.15.9", - "lru-cache": "^6.0.0", - "object-hash": "^2.2.0", - "oidc-token-hash": "^5.0.3" - }, - "funding": { - "url": "https://github.com/sponsors/panva" - } - }, - "node_modules/openid-client/node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/openid-client/node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "license": "ISC" - }, - "node_modules/optionator": { - "version": "0.9.4", - "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", - "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", - "dev": true, - "license": "MIT", - "dependencies": { - "deep-is": "^0.1.3", - "fast-levenshtein": "^2.0.6", - "levn": "^0.4.1", - "prelude-ls": "^1.2.1", - "type-check": "^0.4.0", - "word-wrap": "^1.2.5" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/own-keys": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz", - "integrity": "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg==", - "dev": true, - "license": "MIT", - "dependencies": { - "get-intrinsic": "^1.2.6", - "object-keys": "^1.1.1", - "safe-push-apply": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/p-limit": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", - "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "yocto-queue": "^0.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-locate": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", - "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-limit": "^3.0.2" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-try": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", - "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, - "license": "BlueOak-1.0.0" - }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "dev": true, - "license": "MIT", - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-entities": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", - "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^2.0.0", - "character-entities-legacy": "^3.0.0", - "character-reference-invalid": "^2.0.0", - "decode-named-character-reference": "^1.0.0", - "is-alphanumerical": "^2.0.0", - "is-decimal": "^2.0.0", - "is-hexadecimal": "^2.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/parse-entities/node_modules/@types/unist": { - "version": "2.0.11", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", - "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", - "license": "MIT" - }, - "node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/parse5": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", - "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", - "dev": true, - "license": "MIT", - "dependencies": { - "entities": "^6.0.0" - }, - "funding": { - "url": "https://github.com/inikulin/parse5?sponsor=1" - } - }, - "node_modules/path-exists": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", - "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", - "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "dev": true, - "license": "MIT" - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-scurry/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pirates": { - "version": "4.0.7", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz", - "integrity": "sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6" - } - }, - "node_modules/pkg-dir": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", - "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "find-up": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/pkg-dir/node_modules/find-up": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", - "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", - "dev": true, - "license": "MIT", - "dependencies": { - "locate-path": "^5.0.0", - "path-exists": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/pkg-dir/node_modules/locate-path": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", - "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-locate": "^4.1.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/pkg-dir/node_modules/p-limit": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", - "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-try": "^2.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/pkg-dir/node_modules/p-locate": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", - "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", - "dev": true, - "license": "MIT", - "dependencies": { - "p-limit": "^2.2.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/possible-typed-array-names": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", - "integrity": "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/postcss": { - "version": "8.5.3", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz", - "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.8", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/preact": { - "version": "10.26.9", - "resolved": "https://registry.npmjs.org/preact/-/preact-10.26.9.tgz", - "integrity": "sha512-SSjF9vcnF27mJK1XyFMNJzFd5u3pQiATFqoaDy03XuN00u4ziveVVEGt5RKJrDR8MHE/wJo9Nnad56RLzS2RMA==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/preact" - } - }, - "node_modules/preact-render-to-string": { - "version": "5.2.6", - "resolved": "https://registry.npmjs.org/preact-render-to-string/-/preact-render-to-string-5.2.6.tgz", - "integrity": "sha512-JyhErpYOvBV1hEPwIxc/fHWXPfnEGdRKxc8gFdAZ7XV4tlzyzG847XAyEZqoDnynP88akM4eaHcSOzNcLWFguw==", - "license": "MIT", - "dependencies": { - "pretty-format": "^3.8.0" - }, - "peerDependencies": { - "preact": ">=10" - } - }, - "node_modules/preact-render-to-string/node_modules/pretty-format": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-3.8.0.tgz", - "integrity": "sha512-WuxUnVtlWL1OfZFQFuqvnvs6MiAGk9UNsBostyBOB0Is9wb5uRESevA6rnl/rkksXaGX3GzZhPup5d6Vp1nFew==", - "license": "MIT" - }, - "node_modules/prelude-ls": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", - "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/prettier": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", - "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", - "dev": true, - "license": "MIT", - "bin": { - "prettier": "bin/prettier.cjs" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/prettier/prettier?sponsor=1" - } - }, - "node_modules/prettier-linter-helpers": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/prettier-linter-helpers/-/prettier-linter-helpers-1.0.0.tgz", - "integrity": "sha512-GbK2cP9nraSSUF9N2XwUwqfzlAFlMNYYl+ShE/V+H8a9uNl/oUqB1w2EL54Jh0OlyRSd8RfWYJ3coVS4TROP2w==", - "dev": true, - "license": "MIT", - "dependencies": { - "fast-diff": "^1.1.2" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/pretty-format": { - "version": "27.5.1", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz", - "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1", - "ansi-styles": "^5.0.0", - "react-is": "^17.0.1" - }, - "engines": { - "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" - } - }, - "node_modules/pretty-format/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/pretty-format/node_modules/react-is": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", - "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", - "dev": true, - "license": "MIT" - }, - "node_modules/prop-types": { - "version": "15.8.1", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", - "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", - "dev": true, - "license": "MIT", - "dependencies": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.13.1" - } - }, - "node_modules/property-information": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", - "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/punycode": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", - "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/pure-rand": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-7.0.1.tgz", - "integrity": "sha512-oTUZM/NAZS8p7ANR3SHh30kXB+zK2r2BPcEn/awJIbOvq82WoMN4p62AWWp3Hhw50G0xMsw1mhIBLqHw64EcNQ==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/dubzzz" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/fast-check" - } - ], - "license": "MIT" - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/react": { - "version": "19.2.0", - "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz", - "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-dom": { - "version": "19.2.0", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz", - "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", - "license": "MIT", - "dependencies": { - "scheduler": "^0.27.0" - }, - "peerDependencies": { - "react": "^19.2.0" - } - }, - "node_modules/react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/react-markdown": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-10.1.0.tgz", - "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "devlop": "^1.0.0", - "hast-util-to-jsx-runtime": "^2.0.0", - "html-url-attributes": "^3.0.0", - "mdast-util-to-hast": "^13.0.0", - "remark-parse": "^11.0.0", - "remark-rehype": "^11.0.0", - "unified": "^11.0.0", - "unist-util-visit": "^5.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - }, - "peerDependencies": { - "@types/react": ">=18", - "react": ">=18" - } - }, - "node_modules/react-remove-scroll": { - "version": "2.6.3", - "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.3.tgz", - "integrity": "sha512-pnAi91oOk8g8ABQKGF5/M9qxmmOPxaAnopyTHYfqYEwJhyFrbbBtHuSgtKEoH0jpcxx5o3hXqH1mNd9/Oi+8iQ==", - "license": "MIT", - "dependencies": { - "react-remove-scroll-bar": "^2.3.7", - "react-style-singleton": "^2.2.3", - "tslib": "^2.1.0", - "use-callback-ref": "^1.3.3", - "use-sidecar": "^1.1.3" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", - "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", - "license": "MIT", - "dependencies": { - "react-style-singleton": "^2.2.2", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-style-singleton": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", - "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", - "license": "MIT", - "dependencies": { - "get-nonce": "^1.0.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/redent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", - "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", - "dev": true, - "license": "MIT", - "dependencies": { - "indent-string": "^4.0.0", - "strip-indent": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/reflect.getprototypeof": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.10.tgz", - "integrity": "sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.9", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0", - "get-intrinsic": "^1.2.7", - "get-proto": "^1.0.1", - "which-builtin-type": "^1.2.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/regex": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/regex/-/regex-6.0.1.tgz", - "integrity": "sha512-uorlqlzAKjKQZ5P+kTJr3eeJGSVroLKoHmquUj4zHWuR+hEyNqlXsSKlYYF5F4NI6nl7tWCs0apKJ0lmfsXAPA==", - "license": "MIT", - "dependencies": { - "regex-utilities": "^2.3.0" - } - }, - "node_modules/regex-recursion": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/regex-recursion/-/regex-recursion-6.0.2.tgz", - "integrity": "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==", - "license": "MIT", - "dependencies": { - "regex-utilities": "^2.3.0" - } - }, - "node_modules/regex-utilities": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/regex-utilities/-/regex-utilities-2.3.0.tgz", - "integrity": "sha512-8VhliFJAWRaUiVvREIiW2NXXTmHs4vMNnSzuJVhscgmGav3g9VDxLrQndI3dZZVVdp0ZO/5v0xmX516/7M9cng==", - "license": "MIT" - }, - "node_modules/regexp.prototype.flags": { - "version": "1.5.4", - "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", - "integrity": "sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "define-properties": "^1.2.1", - "es-errors": "^1.3.0", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "set-function-name": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/remark-gfm": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.1.tgz", - "integrity": "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-gfm": "^3.0.0", - "micromark-extension-gfm": "^3.0.0", - "remark-parse": "^11.0.0", - "remark-stringify": "^11.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-parse": { - "version": "11.0.0", - "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", - "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-from-markdown": "^2.0.0", - "micromark-util-types": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-rehype": { - "version": "11.1.2", - "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.2.tgz", - "integrity": "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==", - "license": "MIT", - "dependencies": { - "@types/hast": "^3.0.0", - "@types/mdast": "^4.0.0", - "mdast-util-to-hast": "^13.0.0", - "unified": "^11.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remark-stringify": { - "version": "11.0.0", - "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-11.0.0.tgz", - "integrity": "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==", - "license": "MIT", - "dependencies": { - "@types/mdast": "^4.0.0", - "mdast-util-to-markdown": "^2.0.0", - "unified": "^11.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/remeda": { - "version": "2.32.0", - "resolved": "https://registry.npmjs.org/remeda/-/remeda-2.32.0.tgz", - "integrity": "sha512-BZx9DsT4FAgXDTOdgJIc5eY6ECIXMwtlSPQoPglF20ycSWigttDDe88AozEsPPT4OWk5NujroGSBC1phw5uU+w==", - "license": "MIT", - "dependencies": { - "type-fest": "^4.41.0" - } - }, - "node_modules/remeda/node_modules/type-fest": { - "version": "4.41.0", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", - "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==", - "license": "(MIT OR CC0-1.0)", - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/resolve": { - "version": "1.22.10", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", - "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-core-module": "^2.16.0", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-cwd": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz", - "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "resolve-from": "^5.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/resolve-cwd/node_modules/resolve-from": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", - "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/resolve-pkg-maps": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", - "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" - } - }, - "node_modules/reusify": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", - "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", - "dev": true, - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/rrweb-cssom": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz", - "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==", - "dev": true, - "license": "MIT" - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/safe-array-concat": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz", - "integrity": "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "get-intrinsic": "^1.2.6", - "has-symbols": "^1.1.0", - "isarray": "^2.0.5" - }, - "engines": { - "node": ">=0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safe-push-apply": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz", - "integrity": "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "isarray": "^2.0.5" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safe-regex-test": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.1.0.tgz", - "integrity": "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "is-regex": "^1.2.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, - "license": "MIT" - }, - "node_modules/saxes": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", - "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", - "dev": true, - "license": "ISC", - "dependencies": { - "xmlchars": "^2.2.0" - }, - "engines": { - "node": ">=v12.22.7" - } - }, - "node_modules/scheduler": { - "version": "0.27.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", - "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", - "license": "MIT" - }, - "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", - "devOptional": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/set-function-length": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", - "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "es-errors": "^1.3.0", - "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.4", - "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/set-function-name": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz", - "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-data-property": "^1.1.4", - "es-errors": "^1.3.0", - "functions-have-names": "^1.2.3", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/set-proto": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/set-proto/-/set-proto-1.0.0.tgz", - "integrity": "sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==", - "dev": true, - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/sharp": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz", - "integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==", - "hasInstallScript": true, - "license": "Apache-2.0", - "optional": true, - "dependencies": { - "color": "^4.2.3", - "detect-libc": "^2.0.4", - "semver": "^7.7.2" - }, - "engines": { - "node": "^18.17.0 || ^20.3.0 || >=21.0.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - }, - "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.34.3", - "@img/sharp-darwin-x64": "0.34.3", - "@img/sharp-libvips-darwin-arm64": "1.2.0", - "@img/sharp-libvips-darwin-x64": "1.2.0", - "@img/sharp-libvips-linux-arm": "1.2.0", - "@img/sharp-libvips-linux-arm64": "1.2.0", - "@img/sharp-libvips-linux-ppc64": "1.2.0", - "@img/sharp-libvips-linux-s390x": "1.2.0", - "@img/sharp-libvips-linux-x64": "1.2.0", - "@img/sharp-libvips-linuxmusl-arm64": "1.2.0", - "@img/sharp-libvips-linuxmusl-x64": "1.2.0", - "@img/sharp-linux-arm": "0.34.3", - "@img/sharp-linux-arm64": "0.34.3", - "@img/sharp-linux-ppc64": "0.34.3", - "@img/sharp-linux-s390x": "0.34.3", - "@img/sharp-linux-x64": "0.34.3", - "@img/sharp-linuxmusl-arm64": "0.34.3", - "@img/sharp-linuxmusl-x64": "0.34.3", - "@img/sharp-wasm32": "0.34.3", - "@img/sharp-win32-arm64": "0.34.3", - "@img/sharp-win32-ia32": "0.34.3", - "@img/sharp-win32-x64": "0.34.3" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/shiki": { - "version": "3.13.0", - "resolved": "https://registry.npmjs.org/shiki/-/shiki-3.13.0.tgz", - "integrity": "sha512-aZW4l8Og16CokuCLf8CF8kq+KK2yOygapU5m3+hoGw0Mdosc6fPitjM+ujYarppj5ZIKGyPDPP1vqmQhr+5/0g==", - "license": "MIT", - "dependencies": { - "@shikijs/core": "3.13.0", - "@shikijs/engine-javascript": "3.13.0", - "@shikijs/engine-oniguruma": "3.13.0", - "@shikijs/langs": "3.13.0", - "@shikijs/themes": "3.13.0", - "@shikijs/types": "3.13.0", - "@shikijs/vscode-textmate": "^10.0.2", - "@types/hast": "^3.0.4" - } - }, - "node_modules/side-channel": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", - "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "object-inspect": "^1.13.3", - "side-channel-list": "^1.0.0", - "side-channel-map": "^1.0.1", - "side-channel-weakmap": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-list": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", - "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", - "dev": true, - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "object-inspect": "^1.13.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-map": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", - "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.5", - "object-inspect": "^1.13.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/side-channel-weakmap": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", - "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.5", - "object-inspect": "^1.13.3", - "side-channel-map": "^1.0.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/simple-swizzle": { - "version": "0.2.4", - "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz", - "integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==", - "license": "MIT", - "optional": true, - "dependencies": { - "is-arrayish": "^0.3.1" - } - }, - "node_modules/slash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", - "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/sonner": { - "version": "2.0.7", - "resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz", - "integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==", - "license": "MIT", - "peerDependencies": { - "react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc", - "react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc" - } - }, - "node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/source-map-support": { - "version": "0.5.13", - "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", - "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", - "dev": true, - "license": "MIT", - "dependencies": { - "buffer-from": "^1.0.0", - "source-map": "^0.6.0" - } - }, - "node_modules/space-separated-tokens": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", - "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/sprintf-js": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", - "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", - "dev": true, - "license": "BSD-3-Clause" - }, - "node_modules/stable-hash": { - "version": "0.0.5", - "resolved": "https://registry.npmjs.org/stable-hash/-/stable-hash-0.0.5.tgz", - "integrity": "sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA==", - "dev": true, - "license": "MIT" - }, - "node_modules/stack-utils": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", - "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "escape-string-regexp": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/stack-utils/node_modules/escape-string-regexp": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", - "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/string-length": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz", - "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "char-regex": "^1.0.2", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/string-length/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string.prototype.includes": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.1.tgz", - "integrity": "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.3" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/string.prototype.matchall": { - "version": "4.0.12", - "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.12.tgz", - "integrity": "sha512-6CC9uyBL+/48dYizRf7H7VAYCMCNTBeM78x/VTUe9bFEaxBepPJDa1Ow99LqI/1yF7kuy7Q3cQsYMrcjGUcskA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.3", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.6", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.0.0", - "get-intrinsic": "^1.2.6", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "internal-slot": "^1.1.0", - "regexp.prototype.flags": "^1.5.3", - "set-function-name": "^2.0.2", - "side-channel": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.repeat": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz", - "integrity": "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==", - "dev": true, - "license": "MIT", - "dependencies": { - "define-properties": "^1.1.3", - "es-abstract": "^1.17.5" - } - }, - "node_modules/string.prototype.trim": { - "version": "1.2.10", - "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.10.tgz", - "integrity": "sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "define-data-property": "^1.1.4", - "define-properties": "^1.2.1", - "es-abstract": "^1.23.5", - "es-object-atoms": "^1.0.0", - "has-property-descriptors": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimend": { - "version": "1.0.9", - "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.9.tgz", - "integrity": "sha512-G7Ok5C6E/j4SGfyLCloXTrngQIQU3PWtXGst3yM7Bea9FRURf1S42ZHlZZtsNque2FN2PoUhfZXYLNWwEr4dLQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "call-bound": "^1.0.2", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimstart": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz", - "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "define-properties": "^1.2.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/stringify-entities": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", - "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", - "license": "MIT", - "dependencies": { - "character-entities-html4": "^2.0.0", - "character-entities-legacy": "^3.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/strip-ansi": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", - "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi/node_modules/ansi-regex": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", - "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/strip-final-newline": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", - "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "min-indent": "^1.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-json-comments": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", - "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/style-to-js": { - "version": "1.1.17", - "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.17.tgz", - "integrity": "sha512-xQcBGDxJb6jjFCTzvQtfiPn6YvvP2O8U1MDIPNfJQlWMYfktPy+iGsHE7cssjs7y84d9fQaK4UF3RIJaAHSoYA==", - "license": "MIT", - "dependencies": { - "style-to-object": "1.0.9" - } - }, - "node_modules/style-to-object": { - "version": "1.0.9", - "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.9.tgz", - "integrity": "sha512-G4qppLgKu/k6FwRpHiGiKPaPTFcG3g4wNVX/Qsfu+RqQM30E7Tyu/TEgxcL9PNLF5pdRLwQdE3YKKf+KF2Dzlw==", - "license": "MIT", - "dependencies": { - "inline-style-parser": "0.2.4" - } - }, - "node_modules/styled-jsx": { - "version": "5.1.6", - "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz", - "integrity": "sha512-qSVyDTeMotdvQYoHWLNGwRFJHC+i+ZvdBRYosOFgC+Wg1vx4frN2/RG/NA7SYqqvKNLf39P2LSRA2pu6n0XYZA==", - "license": "MIT", - "dependencies": { - "client-only": "0.0.1" - }, - "engines": { - "node": ">= 12.0.0" - }, - "peerDependencies": { - "react": ">= 16.8.0 || 17.x.x || ^18.0.0-0 || ^19.0.0-0" - }, - "peerDependenciesMeta": { - "@babel/core": { - "optional": true - }, - "babel-plugin-macros": { - "optional": true - } - } - }, - "node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "license": "MIT", - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", - "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/symbol-tree": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", - "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", - "dev": true, - "license": "MIT" - }, - "node_modules/synckit": { - "version": "0.11.11", - "resolved": "https://registry.npmjs.org/synckit/-/synckit-0.11.11.tgz", - "integrity": "sha512-MeQTA1r0litLUf0Rp/iisCaL8761lKAZHaimlbGK4j0HysC4PLfqygQj9srcs0m2RdtDYnF8UuYyKpbjHYp7Jw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@pkgr/core": "^0.2.9" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/synckit" - } - }, - "node_modules/tailwind-merge": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz", - "integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/dcastil" - } - }, - "node_modules/tailwindcss": { - "version": "4.1.14", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.14.tgz", - "integrity": "sha512-b7pCxjGO98LnxVkKjaZSDeNuljC4ueKUddjENJOADtubtdo8llTaJy7HwBMeLNSSo2N5QIAgklslK1+Ir8r6CA==", - "dev": true, - "license": "MIT" - }, - "node_modules/tapable": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", - "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/webpack" - } - }, - "node_modules/tar": { - "version": "7.5.1", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz", - "integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==", - "dev": true, - "license": "ISC", - "dependencies": { - "@isaacs/fs-minipass": "^4.0.0", - "chownr": "^3.0.0", - "minipass": "^7.1.2", - "minizlib": "^3.1.0", - "yallist": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/test-exclude": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", - "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==", - "dev": true, - "license": "ISC", - "dependencies": { - "@istanbuljs/schema": "^0.1.2", - "glob": "^7.1.4", - "minimatch": "^3.0.4" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/test-exclude/node_modules/glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "deprecated": "Glob versions prior to v9 are no longer supported", - "dev": true, - "license": "ISC", - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - }, - "engines": { - "node": "*" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/tinyglobby": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz", - "integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==", - "dev": true, - "license": "MIT", - "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.4.4", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz", - "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz", - "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/tldts": { - "version": "6.1.86", - "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.86.tgz", - "integrity": "sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "tldts-core": "^6.1.86" - }, - "bin": { - "tldts": "bin/cli.js" - } - }, - "node_modules/tldts-core": { - "version": "6.1.86", - "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.86.tgz", - "integrity": "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==", - "dev": true, - "license": "MIT" - }, - "node_modules/tmpl": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", - "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", - "dev": true, - "license": "BSD-3-Clause" - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/tough-cookie": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.1.2.tgz", - "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "tldts": "^6.1.32" - }, - "engines": { - "node": ">=16" - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "license": "MIT" - }, - "node_modules/trim-lines": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", - "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/trough": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", - "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - }, - "node_modules/ts-api-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", - "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18.12" - }, - "peerDependencies": { - "typescript": ">=4.8.4" - } - }, - "node_modules/ts-node": { - "version": "10.9.2", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", - "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@cspotcode/source-map-support": "^0.8.0", - "@tsconfig/node10": "^1.0.7", - "@tsconfig/node12": "^1.0.7", - "@tsconfig/node14": "^1.0.0", - "@tsconfig/node16": "^1.0.2", - "acorn": "^8.4.1", - "acorn-walk": "^8.1.1", - "arg": "^4.1.0", - "create-require": "^1.1.0", - "diff": "^4.0.1", - "make-error": "^1.1.1", - "v8-compile-cache-lib": "^3.0.1", - "yn": "3.1.1" - }, - "bin": { - "ts-node": "dist/bin.js", - "ts-node-cwd": "dist/bin-cwd.js", - "ts-node-esm": "dist/bin-esm.js", - "ts-node-script": "dist/bin-script.js", - "ts-node-transpile-only": "dist/bin-transpile.js", - "ts-script": "dist/bin-script-deprecated.js" - }, - "peerDependencies": { - "@swc/core": ">=1.2.50", - "@swc/wasm": ">=1.2.50", - "@types/node": "*", - "typescript": ">=2.7" - }, - "peerDependenciesMeta": { - "@swc/core": { - "optional": true - }, - "@swc/wasm": { - "optional": true - } - } - }, - "node_modules/tsconfig-paths": { - "version": "3.15.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz", - "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/json5": "^0.0.29", - "json5": "^1.0.2", - "minimist": "^1.2.6", - "strip-bom": "^3.0.0" - } - }, - "node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, - "node_modules/tw-animate-css": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz", - "integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/Wombosvideo" - } - }, - "node_modules/type-check": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", - "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", - "dev": true, - "license": "MIT", - "dependencies": { - "prelude-ls": "^1.2.1" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/type-detect": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", - "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/type-fest": { - "version": "0.21.3", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", - "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", - "dev": true, - "license": "(MIT OR CC0-1.0)", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/typed-array-buffer": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.3.tgz", - "integrity": "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "es-errors": "^1.3.0", - "is-typed-array": "^1.1.14" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/typed-array-byte-length": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.3.tgz", - "integrity": "sha512-BaXgOuIxz8n8pIq3e7Atg/7s+DpiYrxn4vdot3w9KbnBhcRQq6o3xemQdIfynqSeXeDrF32x+WvfzmOjPiY9lg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.8", - "for-each": "^0.3.3", - "gopd": "^1.2.0", - "has-proto": "^1.2.0", - "is-typed-array": "^1.1.14" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typed-array-byte-offset": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.4.tgz", - "integrity": "sha512-bTlAFB/FBYMcuX81gbL4OcpH5PmlFHqlCCpAl8AlEzMz5k53oNDvN8p1PNOWLEmI2x4orp3raOFB51tv9X+MFQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "for-each": "^0.3.3", - "gopd": "^1.2.0", - "has-proto": "^1.2.0", - "is-typed-array": "^1.1.15", - "reflect.getprototypeof": "^1.0.9" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typed-array-length": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.7.tgz", - "integrity": "sha512-3KS2b+kL7fsuk/eJZ7EQdnEmQoaho/r6KUef7hxvltNA5DR8NAUM+8wJMbJyZ4G9/7i3v5zPBIMN5aybAh2/Jg==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bind": "^1.0.7", - "for-each": "^0.3.3", - "gopd": "^1.0.1", - "is-typed-array": "^1.1.13", - "possible-typed-array-names": "^1.0.0", - "reflect.getprototypeof": "^1.0.6" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/typescript": { - "version": "5.9.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", - "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/unbox-primitive": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz", - "integrity": "sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.3", - "has-bigints": "^1.0.2", - "has-symbols": "^1.1.0", - "which-boxed-primitive": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/undici-types": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.14.0.tgz", - "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==", - "license": "MIT" - }, - "node_modules/unified": { - "version": "11.0.5", - "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", - "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "bail": "^2.0.0", - "devlop": "^1.0.0", - "extend": "^3.0.0", - "is-plain-obj": "^4.0.0", - "trough": "^2.0.0", - "vfile": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-is": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", - "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-position": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", - "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-stringify-position": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", - "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-visit": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", - "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0", - "unist-util-visit-parents": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unist-util-visit-parents": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", - "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-is": "^6.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/unrs-resolver": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/unrs-resolver/-/unrs-resolver-1.11.1.tgz", - "integrity": "sha512-bSjt9pjaEBnNiGgc9rUiHGKv5l4/TGzDmYw3RhnkJGtLhbnnA/5qJj7x3dNDCRx/PJxu774LlH8lCOlB4hEfKg==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "dependencies": { - "napi-postinstall": "^0.3.0" - }, - "funding": { - "url": "https://opencollective.com/unrs-resolver" - }, - "optionalDependencies": { - "@unrs/resolver-binding-android-arm-eabi": "1.11.1", - "@unrs/resolver-binding-android-arm64": "1.11.1", - "@unrs/resolver-binding-darwin-arm64": "1.11.1", - "@unrs/resolver-binding-darwin-x64": "1.11.1", - "@unrs/resolver-binding-freebsd-x64": "1.11.1", - "@unrs/resolver-binding-linux-arm-gnueabihf": "1.11.1", - "@unrs/resolver-binding-linux-arm-musleabihf": "1.11.1", - "@unrs/resolver-binding-linux-arm64-gnu": "1.11.1", - "@unrs/resolver-binding-linux-arm64-musl": "1.11.1", - "@unrs/resolver-binding-linux-ppc64-gnu": "1.11.1", - "@unrs/resolver-binding-linux-riscv64-gnu": "1.11.1", - "@unrs/resolver-binding-linux-riscv64-musl": "1.11.1", - "@unrs/resolver-binding-linux-s390x-gnu": "1.11.1", - "@unrs/resolver-binding-linux-x64-gnu": "1.11.1", - "@unrs/resolver-binding-linux-x64-musl": "1.11.1", - "@unrs/resolver-binding-wasm32-wasi": "1.11.1", - "@unrs/resolver-binding-win32-arm64-msvc": "1.11.1", - "@unrs/resolver-binding-win32-ia32-msvc": "1.11.1", - "@unrs/resolver-binding-win32-x64-msvc": "1.11.1" - } - }, - "node_modules/update-browserslist-db": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", - "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.1" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, - "node_modules/uri-js": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", - "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", - "dev": true, - "license": "BSD-2-Clause", - "dependencies": { - "punycode": "^2.1.0" - } - }, - "node_modules/use-callback-ref": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", - "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sidecar": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", - "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", - "license": "MIT", - "dependencies": { - "detect-node-es": "^1.1.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/uuid": { - "version": "8.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", - "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" - } - }, - "node_modules/v8-compile-cache-lib": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", - "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", - "dev": true, - "license": "MIT" - }, - "node_modules/v8-to-istanbul": { - "version": "9.3.0", - "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz", - "integrity": "sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==", - "dev": true, - "license": "ISC", - "dependencies": { - "@jridgewell/trace-mapping": "^0.3.12", - "@types/istanbul-lib-coverage": "^2.0.1", - "convert-source-map": "^2.0.0" - }, - "engines": { - "node": ">=10.12.0" - } - }, - "node_modules/vfile": { - "version": "6.0.3", - "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", - "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-message": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", - "integrity": "sha512-jRDZ1IMLttGj41KcZvlrYAaI3CfqpLpfpf+Mfig13viT6NKvRzWZ+lXz0Y5D60w6uJIBAOGq9mSHf0gktF0duw==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0", - "unist-util-stringify-position": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/w3c-xmlserializer": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", - "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", - "dev": true, - "license": "MIT", - "dependencies": { - "xml-name-validator": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/walker": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz", - "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "makeerror": "1.0.12" - } - }, - "node_modules/web-streams-polyfill": { - "version": "4.0.0-beta.3", - "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", - "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "license": "BSD-2-Clause" - }, - "node_modules/whatwg-encoding": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", - "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "iconv-lite": "0.6.3" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/whatwg-mimetype": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", - "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "license": "MIT", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, - "node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/which-boxed-primitive": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz", - "integrity": "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-bigint": "^1.1.0", - "is-boolean-object": "^1.2.1", - "is-number-object": "^1.1.1", - "is-string": "^1.1.1", - "is-symbol": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-builtin-type": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.2.1.tgz", - "integrity": "sha512-6iBczoX+kDQ7a3+YJBnh3T+KZRxM/iYNPXicqk66/Qfm1b93iu+yOImkg0zHbj5LNOcNv1TEADiZ0xa34B4q6Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "call-bound": "^1.0.2", - "function.prototype.name": "^1.1.6", - "has-tostringtag": "^1.0.2", - "is-async-function": "^2.0.0", - "is-date-object": "^1.1.0", - "is-finalizationregistry": "^1.1.0", - "is-generator-function": "^1.0.10", - "is-regex": "^1.2.1", - "is-weakref": "^1.0.2", - "isarray": "^2.0.5", - "which-boxed-primitive": "^1.1.0", - "which-collection": "^1.0.2", - "which-typed-array": "^1.1.16" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-collection": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz", - "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==", - "dev": true, - "license": "MIT", - "dependencies": { - "is-map": "^2.0.3", - "is-set": "^2.0.3", - "is-weakmap": "^2.0.2", - "is-weakset": "^2.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-typed-array": { - "version": "1.1.19", - "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.19.tgz", - "integrity": "sha512-rEvr90Bck4WZt9HHFC4DJMsjvu7x+r6bImz0/BrbWb7A2djJ8hnZMrWnHo9F8ssv0OMErasDhftrfROTyqSDrw==", - "dev": true, - "license": "MIT", - "dependencies": { - "available-typed-arrays": "^1.0.7", - "call-bind": "^1.0.8", - "call-bound": "^1.0.4", - "for-each": "^0.3.5", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-tostringtag": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/word-wrap": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", - "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "6.2.3", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", - "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/write-file-atomic": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-5.0.1.tgz", - "integrity": "sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==", - "dev": true, - "license": "ISC", - "dependencies": { - "imurmurhash": "^0.1.4", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - } - }, - "node_modules/ws": { - "version": "8.18.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", - "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/xml-name-validator": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", - "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/xmlchars": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", - "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", - "dev": true, - "license": "MIT" - }, - "node_modules/y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=10" - } - }, - "node_modules/yallist": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", - "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", - "dev": true, - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "dev": true, - "license": "MIT", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/yargs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/yargs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/yn": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", - "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/yocto-queue": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", - "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/zwitch": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", - "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/wooorm" - } - } - } -} diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json deleted file mode 100644 index 9350be16a..000000000 --- a/llama_stack/ui/package.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "name": "ui", - "version": "0.1.0", - "private": true, - "scripts": { - "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}", - "build": "next build", - "start": "next start", - "lint": "next lint", - "format": "prettier --write \"./**/*.{ts,tsx}\"", - "format:check": "prettier --check \"./**/*.{ts,tsx}\"", - "test": "jest", - "test:e2e": "playwright test" - }, - "dependencies": { - "@radix-ui/react-collapsible": "^1.1.12", - "@radix-ui/react-dialog": "^1.1.15", - "@radix-ui/react-dropdown-menu": "^2.1.16", - "@radix-ui/react-select": "^2.2.6", - "@radix-ui/react-separator": "^1.1.7", - "@radix-ui/react-slot": "^1.2.3", - "@radix-ui/react-tooltip": "^1.2.8", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "framer-motion": "^12.23.24", - "llama-stack-client": "^0.3.0", - "lucide-react": "^0.545.0", - "next": "15.5.4", - "next-auth": "^4.24.11", - "next-themes": "^0.4.6", - "react": "^19.0.0", - "react-dom": "^19.2.0", - "react-markdown": "^10.1.0", - "remark-gfm": "^4.0.1", - "remeda": "^2.32.0", - "shiki": "^3.13.0", - "sonner": "^2.0.7", - "tailwind-merge": "^3.3.1" - }, - "devDependencies": { - "@eslint/eslintrc": "^3", - "@tailwindcss/postcss": "^4", - "@testing-library/dom": "^10.4.1", - "@testing-library/jest-dom": "^6.8.0", - "@testing-library/react": "^16.3.0", - "@types/jest": "^30.0.0", - "@types/node": "^24", - "@types/react": "^19", - "@types/react-dom": "^19", - "eslint": "^9", - "eslint-config-next": "15.5.6", - "eslint-config-prettier": "^10.1.8", - "eslint-plugin-prettier": "^5.5.4", - "jest": "^30.2.0", - "jest-environment-jsdom": "^30.2.0", - "prettier": "3.6.2", - "tailwindcss": "^4", - "ts-node": "^10.9.2", - "tw-animate-css": "^1.4.0", - "typescript": "^5" - } -} diff --git a/pyproject.toml b/pyproject.toml index 741dd17e5..3e16dc08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ required-version = ">=0.7.0" [project] name = "llama_stack" -version = "0.3.0" +version = "0.4.0.dev0" authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] description = "Llama Stack" readme = "README.md" @@ -24,20 +24,20 @@ classifiers = [ "Topic :: Scientific/Engineering :: Information Analysis", ] dependencies = [ + "PyYAML>=6.0", "aiohttp", "fastapi>=0.115.0,<1.0", # server "fire", # for MCP in LLS client "httpx", "jinja2>=3.1.6", "jsonschema", - "llama-stack-client>=0.3.0", - "openai>=1.107", # for expires_after support + "llama-stack-api", # API and provider specifications (local dev via tool.uv.sources) + "openai>=2.5.0", "prompt-toolkit", "python-dotenv", "pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support. "pydantic>=2.11.9", "rich", - "starlette", "termcolor", "tiktoken", "pillow", @@ -49,14 +49,12 @@ dependencies = [ "aiosqlite>=0.21.0", # server - for metadata store "asyncpg", # for metadata store "sqlalchemy[asyncio]>=2.0.41", # server - for conversations + "starlette>=0.49.1", ] [project.optional-dependencies] -ui = [ - "streamlit", - "pandas", - "llama-stack-client>=0.3.0", - "streamlit-option-menu", +client = [ + "llama-stack-client>=0.3.0", # Optional for library-only usage ] [dependency-groups] @@ -67,23 +65,56 @@ dev = [ "pytest-cov", "pytest-html", "pytest-json-report", - "pytest-socket", # For blocking network access in unit tests - "nbval", # For notebook testing + "pytest-socket", # For blocking network access in unit tests + "nbval", # For notebook testing "black", "ruff", + "mypy", + "pre-commit>=4.4.0", + "ruamel.yaml", # needed for openapi generator + "openapi-spec-validator>=0.7.2", +] +# Type checking dependencies - includes type stubs and optional runtime dependencies +# needed for complete mypy coverage across all optional features +type_checking = [ "types-requests", "types-setuptools", - "pre-commit", - "ruamel.yaml", # needed for openapi generator + "types-jsonschema", + "pandas-stubs", + "types-psutil", + "types-tqdm", + "boto3-stubs[s3]", + "streamlit", + "streamlit-option-menu", + "pandas", + "anthropic", + "databricks-sdk", + "fairscale", + "torchtune", + "trl", + "peft", + "datasets", + "together", + "nest-asyncio", + "pymongo", + "torchvision", + "sqlite-vec", + "faiss-cpu", + "lm-format-enforcer", + "mcp", + "ollama", + "llama-stack-client>=0.3.0", ] # These are the dependencies required for running unit tests. unit = [ + "anthropic", + "databricks-sdk", "sqlite-vec", "ollama", "aiosqlite", "aiohttp", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "chardet", "sqlalchemy", @@ -106,7 +137,7 @@ test = [ "torchvision>=0.21.0", "chardet", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "datasets>=4.0.0", "autoevals", @@ -150,8 +181,13 @@ llama = "llama_stack.cli.llama:main" install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned" [tool.setuptools.packages.find] -where = ["."] -include = ["llama_stack", "llama_stack.*"] +where = ["src"] +include = [ + "llama_stack", + "llama_stack.*", + "llama_stack_api", + "llama_stack_api.*", +] [[tool.uv.index]] name = "pytorch-cpu" @@ -161,6 +197,7 @@ explicit = true [tool.uv.sources] torch = [{ index = "pytorch-cpu" }] torchvision = [{ index = "pytorch-cpu" }] +llama-stack-api = [{ path = "src/llama_stack_api", editable = true }] [tool.ruff] line-length = 120 @@ -217,18 +254,20 @@ unfixable = [ # Ignore the following errors for the following files [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests -"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"] -"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [ +"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = [ + "RUF001", +] +"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [ "RUF001", "PLE2515", ] -"llama_stack/apis/**/__init__.py" = [ +"src/llama_stack/apis/**/__init__.py" = [ "F403", ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] -mypy_path = ["llama_stack"] -packages = ["llama_stack"] +mypy_path = ["src"] +packages = ["llama_stack", "llama_stack_api"] plugins = ['pydantic.mypy'] disable_error_code = [] warn_return_any = true @@ -239,82 +278,96 @@ follow_imports = "silent" # to exclude the entire directory. exclude = [ # As we fix more and more of these, we should remove them from the list - "^llama_stack.core/build\\.py$", - "^llama_stack.core/client\\.py$", - "^llama_stack.core/request_headers\\.py$", - "^llama_stack.core/routers/", - "^llama_stack.core/routing_tables/", - "^llama_stack.core/server/endpoints\\.py$", - "^llama_stack.core/server/server\\.py$", - "^llama_stack.core/stack\\.py$", - "^llama_stack.core/store/registry\\.py$", - "^llama_stack.core/utils/exec\\.py$", - "^llama_stack.core/utils/prompt_for_config\\.py$", - "^llama_stack/models/llama/llama3/interface\\.py$", - "^llama_stack/models/llama/llama3/tokenizer\\.py$", - "^llama_stack/models/llama/llama3/tool_utils\\.py$", - "^llama_stack/providers/inline/agents/meta_reference/", - "^llama_stack/providers/inline/datasetio/localfs/", - "^llama_stack/providers/inline/eval/meta_reference/eval\\.py$", - "^llama_stack/providers/inline/inference/meta_reference/inference\\.py$", - "^llama_stack/models/llama/llama3/generation\\.py$", - "^llama_stack/models/llama/llama3/multimodal/model\\.py$", - "^llama_stack/models/llama/llama4/", - "^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$", - "^llama_stack/providers/inline/post_training/common/validator\\.py$", - "^llama_stack/providers/inline/safety/code_scanner/", - "^llama_stack/providers/inline/safety/llama_guard/", - "^llama_stack/providers/inline/scoring/basic/", - "^llama_stack/providers/inline/scoring/braintrust/", - "^llama_stack/providers/inline/scoring/llm_as_judge/", - "^llama_stack/providers/remote/agents/sample/", - "^llama_stack/providers/remote/datasetio/huggingface/", - "^llama_stack/providers/remote/datasetio/nvidia/", - "^llama_stack/providers/remote/inference/bedrock/", - "^llama_stack/providers/remote/inference/nvidia/", - "^llama_stack/providers/remote/inference/passthrough/", - "^llama_stack/providers/remote/inference/runpod/", - "^llama_stack/providers/remote/inference/tgi/", - "^llama_stack/providers/remote/inference/watsonx/", - "^llama_stack/providers/remote/safety/bedrock/", - "^llama_stack/providers/remote/safety/nvidia/", - "^llama_stack/providers/remote/safety/sambanova/", - "^llama_stack/providers/remote/safety/sample/", - "^llama_stack/providers/remote/tool_runtime/bing_search/", - "^llama_stack/providers/remote/tool_runtime/brave_search/", - "^llama_stack/providers/remote/tool_runtime/model_context_protocol/", - "^llama_stack/providers/remote/tool_runtime/tavily_search/", - "^llama_stack/providers/remote/tool_runtime/wolfram_alpha/", - "^llama_stack/providers/remote/post_training/nvidia/", - "^llama_stack/providers/remote/vector_io/chroma/", - "^llama_stack/providers/remote/vector_io/milvus/", - "^llama_stack/providers/remote/vector_io/pgvector/", - "^llama_stack/providers/remote/vector_io/qdrant/", - "^llama_stack/providers/remote/vector_io/sample/", - "^llama_stack/providers/remote/vector_io/weaviate/", - "^llama_stack/providers/utils/bedrock/client\\.py$", - "^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$", - "^llama_stack/providers/utils/inference/embedding_mixin\\.py$", - "^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$", - "^llama_stack/providers/utils/inference/model_registry\\.py$", - "^llama_stack/providers/utils/inference/openai_compat\\.py$", - "^llama_stack/providers/utils/inference/prompt_adapter\\.py$", - "^llama_stack/providers/utils/kvstore/kvstore\\.py$", - "^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$", - "^llama_stack/providers/utils/kvstore/redis/redis\\.py$", - "^llama_stack/providers/utils/memory/vector_store\\.py$", - "^llama_stack/providers/utils/scoring/aggregation_utils\\.py$", - "^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$", - "^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", - "^llama_stack/providers/utils/telemetry/trace_protocol\\.py$", - "^llama_stack/providers/utils/telemetry/tracing\\.py$", - "^llama_stack/strong_typing/auxiliary\\.py$", - "^llama_stack/distributions/template\\.py$", + "^src/llama_stack/core/build\\.py$", + "^src/llama_stack/core/client\\.py$", + "^src/llama_stack/core/request_headers\\.py$", + "^src/llama_stack/core/routers/", + "^src/llama_stack/core/routing_tables/", + "^src/llama_stack/core/server/endpoints\\.py$", + "^src/llama_stack/core/server/server\\.py$", + "^src/llama_stack/core/stack\\.py$", + "^src/llama_stack/core/store/registry\\.py$", + "^src/llama_stack/core/utils/exec\\.py$", + "^src/llama_stack/core/utils/prompt_for_config\\.py$", + "^src/llama_stack/models/llama/llama3/interface\\.py$", + "^src/llama_stack/models/llama/llama3/tokenizer\\.py$", + "^src/llama_stack/models/llama/llama3/tool_utils\\.py$", + "^src/llama_stack/models/llama/llama3/generation\\.py$", + "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$", + "^src/llama_stack/models/llama/llama4/", + "^src/llama_stack/providers/inline/agents/meta_reference/", + "^src/llama_stack/providers/inline/datasetio/localfs/", + "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", + "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$", + "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$", + "^src/llama_stack/providers/inline/post_training/common/validator\\.py$", + "^src/llama_stack/providers/inline/safety/code_scanner/", + "^src/llama_stack/providers/inline/safety/llama_guard/", + "^src/llama_stack/providers/inline/scoring/basic/", + "^src/llama_stack/providers/inline/scoring/braintrust/", + "^src/llama_stack/providers/inline/scoring/llm_as_judge/", + "^src/llama_stack/providers/remote/agents/sample/", + "^src/llama_stack/providers/remote/datasetio/huggingface/", + "^src/llama_stack/providers/remote/datasetio/nvidia/", + "^src/llama_stack/providers/remote/inference/oci/", + "^src/llama_stack/providers/remote/inference/bedrock/", + "^src/llama_stack/providers/remote/inference/nvidia/", + "^src/llama_stack/providers/remote/inference/passthrough/", + "^src/llama_stack/providers/remote/inference/runpod/", + "^src/llama_stack/providers/remote/inference/tgi/", + "^src/llama_stack/providers/remote/inference/watsonx/", + "^src/llama_stack/providers/remote/safety/bedrock/", + "^src/llama_stack/providers/remote/safety/nvidia/", + "^src/llama_stack/providers/remote/safety/sambanova/", + "^src/llama_stack/providers/remote/safety/sample/", + "^src/llama_stack/providers/remote/tool_runtime/bing_search/", + "^src/llama_stack/providers/remote/tool_runtime/brave_search/", + "^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/", + "^src/llama_stack/providers/remote/tool_runtime/tavily_search/", + "^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/", + "^src/llama_stack/providers/remote/post_training/nvidia/", + "^src/llama_stack/providers/remote/vector_io/chroma/", + "^src/llama_stack/providers/remote/vector_io/milvus/", + "^src/llama_stack/providers/remote/vector_io/pgvector/", + "^src/llama_stack/providers/remote/vector_io/qdrant/", + "^src/llama_stack/providers/remote/vector_io/sample/", + "^src/llama_stack/providers/remote/vector_io/weaviate/", + "^src/llama_stack/providers/utils/bedrock/client\\.py$", + "^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$", + "^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$", + "^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$", + "^src/llama_stack/providers/utils/inference/model_registry\\.py$", + "^src/llama_stack/providers/utils/inference/openai_compat\\.py$", + "^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$", + "^src/llama_stack/providers/utils/kvstore/kvstore\\.py$", + "^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$", + "^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$", + "^src/llama_stack/providers/utils/memory/vector_store\\.py$", + "^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$", + "^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$", + "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", + "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$", + "^src/llama_stack/providers/utils/telemetry/tracing\\.py$", + "^src/llama_stack/distributions/template\\.py$", ] [[tool.mypy.overrides]] # packages that lack typing annotations, do not have stubs, or are unavailable. -module = ["yaml", "fire"] +module = [ + "yaml", + "fire", + "redis.asyncio", + "psycopg2", + "psycopg2.extras", + "psycopg2.extensions", + "torchtune.*", + "fairscale.*", + "torchvision.*", + "datasets", + "nest_asyncio", + "streamlit_option_menu", + "lmformatenforcer.*", +] ignore_missing_imports = true [tool.pydantic-mypy] diff --git a/scripts/check-init-py.sh b/scripts/check-init-py.sh index c6e8fd417..76b3ed8e2 100755 --- a/scripts/check-init-py.sh +++ b/scripts/check-init-py.sh @@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then exit 1 fi -PACKAGE_DIR="${1:-llama_stack}" +PACKAGE_DIR="${1:-src/llama_stack}" if [ ! -d "$PACKAGE_DIR" ]; then echo "ERROR: Package directory '$PACKAGE_DIR' does not exist" diff --git a/scripts/cleanup_recordings.py b/scripts/cleanup_recordings.py new file mode 100755 index 000000000..14f8cce84 --- /dev/null +++ b/scripts/cleanup_recordings.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Clean up unused test recordings based on CI test collection. + +This script: +1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides) +2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI +3. Compares against existing recordings to identify unused ones +4. Optionally deletes unused recordings + +Usage: + # Dry run - see what would be deleted + ./scripts/cleanup_recordings.py + + # Save manifest of CI test IDs for inspection + ./scripts/cleanup_recordings.py --manifest ci_tests.txt + + # Actually delete unused recordings + ./scripts/cleanup_recordings.py --delete +""" + +import argparse +import json +import os +import subprocess +import tempfile +from collections import defaultdict +from pathlib import Path + +REPO_ROOT = Path(__file__).parent.parent + +# Load CI matrix from JSON file +CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json" +with open(CI_MATRIX_FILE) as f: + _matrix_config = json.load(f) + +DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"] +SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {}) + + +def _unique_configs(entries): + seen: set[tuple[str, str]] = set() + for entry in entries: + suite = entry["suite"] + setup = entry["setup"] + key = (suite, setup) + if key in seen: + continue + seen.add(key) + yield {"suite": suite, "setup": setup} + + +def iter_all_ci_configs() -> list[dict[str, str]]: + """Return unique CI configs across default and scheduled matrices.""" + combined = list(DEFAULT_CI_MATRIX) + for configs in SCHEDULED_MATRICES.values(): + combined.extend(configs) + return list(_unique_configs(combined)) + + +def collect_ci_tests(): + """Collect all test IDs that would run in CI using --collect-only with JSON output.""" + + all_test_ids = set() + configs = iter_all_ci_configs() + + for config in configs: + print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...") + + # Create a temporary file for JSON report + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json_report_file = f.name + + try: + # Configure environment for collection run + env = os.environ.copy() + env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}" + repo_path = str(REPO_ROOT) + existing_path = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path + + result = subprocess.run( + [ + "./scripts/integration-tests.sh", + "--collect-only", + "--suite", + config["suite"], + "--setup", + config["setup"], + ], + capture_output=True, + text=True, + cwd=REPO_ROOT, + env=env, + ) + + if result.returncode != 0: + raise RuntimeError( + "Test collection failed.\n" + f"Command: {' '.join(result.args)}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + + # Parse JSON report to extract test IDs + try: + with open(json_report_file) as f: + report = json.load(f) + + # The "collectors" field contains collected test items + # Each collector has a "result" array with test node IDs + for collector in report.get("collectors", []): + for item in collector.get("result", []): + # The "nodeid" field is the test ID + if "nodeid" in item: + all_test_ids.add(item["nodeid"]) + + print(f" Collected {len(all_test_ids)} test IDs so far") + + except (json.JSONDecodeError, FileNotFoundError) as e: + print(f" Warning: Failed to parse JSON report: {e}") + continue + + finally: + # Clean up temp file + if os.path.exists(json_report_file): + os.unlink(json_report_file) + + print(f"\nTotal unique test IDs collected: {len(all_test_ids)}") + return all_test_ids, configs + + +def get_base_test_id(test_id: str) -> str: + """Extract base test ID without parameterization. + + Example: + 'tests/integration/inference/test_foo.py::test_bar[param1-param2]' + -> 'tests/integration/inference/test_foo.py::test_bar' + """ + return test_id.split("[")[0] if "[" in test_id else test_id + + +def find_all_recordings(): + """Find all recording JSON files.""" + return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json")) + + +def analyze_recordings(ci_test_ids, dry_run=True): + """Analyze recordings and identify unused ones.""" + + # Use full test IDs with parameterization for exact matching + all_recordings = find_all_recordings() + print(f"\nTotal recording files: {len(all_recordings)}") + + # Categorize recordings + used_recordings = [] + unused_recordings = [] + shared_recordings = [] # model-list endpoints without test_id + parse_errors = [] + + for json_file in all_recordings: + try: + with open(json_file) as f: + data = json.load(f) + + test_id = data.get("test_id", "") + + if not test_id: + # Shared/infrastructure recordings (model lists, etc) + shared_recordings.append(json_file) + continue + + # Match exact test_id (with full parameterization) + if test_id in ci_test_ids: + used_recordings.append(json_file) + else: + unused_recordings.append((json_file, test_id)) + + except Exception as e: + parse_errors.append((json_file, str(e))) + + # Print summary + print("\nRecording Analysis:") + print(f" Used in CI: {len(used_recordings)}") + print(f" Shared (no ID): {len(shared_recordings)}") + print(f" UNUSED: {len(unused_recordings)}") + print(f" Parse errors: {len(parse_errors)}") + + if unused_recordings: + print("\nUnused recordings by test:") + + # Group by base test ID + by_test = defaultdict(list) + for file, test_id in unused_recordings: + base = get_base_test_id(test_id) + by_test[base].append(file) + + for base_test, files in sorted(by_test.items()): + print(f"\n {base_test}") + print(f" ({len(files)} recording(s))") + for f in files[:3]: + print(f" - {f.relative_to(REPO_ROOT / 'tests/integration')}") + if len(files) > 3: + print(f" ... and {len(files) - 3} more") + + if parse_errors: + print("\nParse errors:") + for file, error in parse_errors[:5]: + print(f" {file.relative_to(REPO_ROOT)}: {error}") + if len(parse_errors) > 5: + print(f" ... and {len(parse_errors) - 5} more") + + # Perform cleanup + if not dry_run: + print(f"\nDeleting {len(unused_recordings)} unused recordings...") + for file, _ in unused_recordings: + file.unlink() + print(f" Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}") + print("✅ Cleanup complete") + else: + print("\n(Dry run - no files deleted)") + print("\nTo delete these files, run with --delete") + + return len(unused_recordings) + + +def main(): + parser = argparse.ArgumentParser( + description="Clean up unused test recordings based on CI test collection", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)") + parser.add_argument("--manifest", help="Save collected test IDs to file (optional)") + + args = parser.parse_args() + + print("=" * 60) + print("Recording Cleanup Utility") + print("=" * 60) + + ci_configs = iter_all_ci_configs() + + print(f"\nDetected CI configurations: {len(ci_configs)}") + for config in ci_configs: + print(f" - suite={config['suite']}, setup={config['setup']}") + + # Collect test IDs from CI configurations + ci_test_ids, _ = collect_ci_tests() + + if args.manifest: + with open(args.manifest, "w") as f: + for test_id in sorted(ci_test_ids): + f.write(f"{test_id}\n") + print(f"\nSaved test IDs to: {args.manifest}") + + # Analyze and cleanup + unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete) + + print("\n" + "=" * 60) + if unused_count > 0 and not args.delete: + print("Run with --delete to remove unused recordings") + + +if __name__ == "__main__": + main() diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py index ff5025b78..4dbdda5c4 100755 --- a/scripts/distro_codegen.py +++ b/scripts/distro_codegen.py @@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke if template_func := getattr(module, "get_distribution_template", None): distro = template_func() - yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name + yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro" change_tracker.add_paths(yaml_output_dir, doc_output_dir) distro.save_distribution( @@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None: def main(): - distros_dir = REPO_ROOT / "llama_stack" / "distributions" + distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" change_tracker = ChangedPathTracker() with Progress( diff --git a/scripts/docker.sh b/scripts/docker.sh index a0690c8a9..3b2db5ca7 100755 --- a/scripts/docker.sh +++ b/scripts/docker.sh @@ -215,6 +215,16 @@ build_image() { --build-arg "LLAMA_STACK_DIR=/workspace" ) + # Pass UV index configuration for release branches + if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then + echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL" + build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL") + fi + if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then + echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY" + build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY") + fi + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 @@ -277,9 +287,9 @@ start_container() { # On macOS/Windows, use host.docker.internal to reach host from container # On Linux with --network host, use localhost if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then - OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}" + OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}" else - OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" + OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}" fi DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py deleted file mode 100755 index 3df2af06b..000000000 --- a/scripts/gen-changelog.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -import requests - - -def get_all_releases(token): - url = "https://api.github.com/repos/meta-llama/llama-stack/releases" - headers = {"Accept": "application/vnd.github.v3+json"} - - if token: - headers["Authorization"] = f"token {token}" - - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() - else: - raise Exception(f"Error fetching releases: {response.status_code}, {response.text}") - - -def clean_release_body(body): - """Remove '## All changes' sections from release notes.""" - lines = body.split("\n") - cleaned_lines = [] - skip_mode = False - - for line in lines: - if line.strip() in [ - "## All changes", - "### What's Changed", - "## What's Changed", - "## New Contributors", - ]: - skip_mode = True - elif skip_mode and line.startswith("##"): - # Found a new section, stop skipping - skip_mode = False - cleaned_lines.append(line) - elif not skip_mode: - cleaned_lines.append(line) - - return "\n".join(cleaned_lines) - - -def merge_release_notes(output_file, token=None): - releases = get_all_releases(token) - - with open(output_file, "w", encoding="utf-8") as md_file: - md_file.write("# Changelog\n\n") - - for release in releases: - md_file.write(f"# {release['tag_name']}\n") - md_file.write(f"Published on: {release['published_at']}\n\n") - - # Clean the release body to remove "## All changes" sections - cleaned_body = clean_release_body(release["body"]) - md_file.write(f"{cleaned_body}\n\n") - - md_file.write("---\n\n") - - print(f"Merged release notes saved to {output_file}") - - -if __name__ == "__main__": - OUTPUT_FILE = "CHANGELOG.md" - TOKEN = os.getenv("GITHUB_TOKEN") - merge_release_notes(OUTPUT_FILE, TOKEN) diff --git a/scripts/generate_ci_matrix.py b/scripts/generate_ci_matrix.py new file mode 100755 index 000000000..0d4e924b3 --- /dev/null +++ b/scripts/generate_ci_matrix.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Generate CI test matrix from ci_matrix.json with schedule/input overrides. + +This script is used by .github/workflows/integration-tests.yml to generate +the test matrix dynamically based on the CI_MATRIX definition. +""" + +import json +from pathlib import Path + +CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json" + +with open(CI_MATRIX_FILE) as f: + matrix_config = json.load(f) + +DEFAULT_MATRIX = matrix_config["default"] +SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {}) + + +def generate_matrix(schedule="", test_setup=""): + """ + Generate test matrix based on schedule or manual input. + + Args: + schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly) + test_setup: Manual test setup input (e.g., "ollama-vision") + + Returns: + Matrix configuration as JSON string + """ + # Weekly scheduled test matrices + if schedule and schedule in SCHEDULE_MATRICES: + matrix = SCHEDULE_MATRICES[schedule] + # Manual input for specific setup + elif test_setup == "ollama-vision": + matrix = [{"suite": "vision", "setup": "ollama-vision"}] + # Default: use JSON-defined matrix + else: + matrix = DEFAULT_MATRIX + + # GitHub Actions expects {"include": [...]} format + return json.dumps({"include": matrix}) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Generate CI test matrix") + parser.add_argument("--schedule", default="", help="GitHub schedule cron string") + parser.add_argument("--test-setup", default="", help="Manual test setup input") + + args = parser.parse_args() + + print(generate_matrix(args.schedule, args.test_setup)) diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py index 855033f95..381bbc6a7 100755 --- a/scripts/generate_prompt_format.py +++ b/scripts/generate_prompt_format.py @@ -15,10 +15,10 @@ from pathlib import Path import fire -from llama_stack.apis.common.errors import ModelNotFoundError from llama_stack.models.llama.llama3.generation import Llama3 from llama_stack.models.llama.llama4.generation import Llama4 from llama_stack.models.llama.sku_list import resolve_model +from llama_stack_api import ModelNotFoundError THIS_DIR = Path(__file__).parent.resolve() diff --git a/scripts/get_setup_env.py b/scripts/get_setup_env.py index fad601e76..755cfefea 100755 --- a/scripts/get_setup_env.py +++ b/scripts/get_setup_env.py @@ -16,16 +16,16 @@ import sys from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS -def get_setup_env_vars(setup_name, suite_name=None): +def get_setup_config(setup_name, suite_name=None): """ - Get environment variables for a setup, with optional suite default fallback. + Get full configuration (env vars + defaults) for a setup. Args: setup_name: Name of the setup (e.g., 'ollama', 'gpt') suite_name: Optional suite name to get default setup if setup_name is None Returns: - Dictionary of environment variables + Dictionary with 'env' and 'defaults' keys """ # If no setup specified, try to get default from suite if not setup_name and suite_name: @@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None): setup_name = suite.default_setup if not setup_name: - return {} + return {"env": {}, "defaults": {}} setup = SETUP_DEFINITIONS.get(setup_name) if not setup: @@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None): ) sys.exit(1) - return setup.env + return {"env": setup.env, "defaults": setup.defaults} def main(): - parser = argparse.ArgumentParser(description="Extract environment variables from a test setup") + parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup") parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)") parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided") parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)") args = parser.parse_args() - env_vars = get_setup_env_vars(args.setup, args.suite) + config = get_setup_config(args.setup, args.suite) if args.format == "bash": - # Output as bash export statements - for key, value in env_vars.items(): + # Output env vars as bash export statements + for key, value in config["env"].items(): print(f"export {key}='{value}'") + # Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix + for key, value in config["defaults"].items(): + env_key = f"LLAMA_STACK_TEST_{key.upper()}" + print(f"export {env_key}='{value}'") elif args.format == "json": import json - print(json.dumps(env_vars)) + print(json.dumps(config)) if __name__ == "__main__": diff --git a/scripts/install.sh b/scripts/install.sh index 2417097f4..7fe1d3243 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -30,8 +30,10 @@ materialize_telemetry_configs() { local otel_cfg="${dest}/otel-collector-config.yaml" local prom_cfg="${dest}/prometheus.yml" local graf_cfg="${dest}/grafana-datasources.yaml" + local graf_dash_cfg="${dest}/grafana-dashboards.yaml" + local dash_json="${dest}/llama-stack-dashboard.json" - for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do + for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do if [ -e "$asset" ]; then die "Telemetry asset ${asset} already exists; refusing to overwrite" fi @@ -103,6 +105,7 @@ datasources: type: prometheus access: proxy url: http://prometheus:9090 + uid: prometheus isDefault: true editable: true @@ -112,6 +115,224 @@ datasources: url: http://jaeger:16686 editable: true EOF + + cat <<'EOF' > "$graf_dash_cfg" +apiVersion: 1 + +providers: + - name: 'Llama Stack' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards +EOF + + # Copy the dashboard JSON inline to avoid line-length issues + cat > "$dash_json" <<'DASHBOARD_JSON' +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "lineInterpolation": "linear", + "showPoints": "auto", + "fillOpacity": 10 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{"color": "green", "value": null}] + } + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "id": 1, + "options": { + "legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "expr": "llama_stack_completion_tokens_total", + "legendFormat": "{{model_id}} ({{provider_id}})", + "refId": "A" + } + ], + "title": "Completion Tokens", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]} + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "id": 2, + "options": { + "legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"} + ], + "title": "Prompt & Total Tokens", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "ms" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "id": 3, + "options": { + "legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"} + ], + "title": "HTTP Request Duration (p95, p99)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]} + } + }, + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 8}, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"} + ], + "title": "Total Requests", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]} + } + }, + "gridPos": {"h": 8, "w": 6, "x": 18, "y": 8}, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"} + ], + "title": "Active Requests", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "reqps" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, + "id": 6, + "options": { + "legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"} + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, + "id": 7, + "options": { + "legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "none"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"} + ], + "title": "Request/Response Sizes", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": ["llama-stack"], + "templating": {"list": []}, + "time": {"from": "now-15m", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Llama Stack Metrics", + "uid": "llama-stack-metrics", + "version": 0, + "weekStart": "" +} +DASHBOARD_JSON } # Cleanup function to remove temporary files @@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then -e GF_SECURITY_ADMIN_PASSWORD=admin \ -e GF_USERS_ALLOW_SIGN_UP=false \ -v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \ + -v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \ + -v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \ docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then die "Grafana startup failed" fi @@ -417,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \ --network llama-net \ -p "${PORT}:${PORT}" \ "${server_env_opts[@]}" \ - -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ + -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \ "${SERVER_IMAGE}" --port "${PORT}") log "🦙 Starting Llama Stack..." diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 93739052b..9907cd0bb 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -20,10 +20,11 @@ TEST_PATTERN="" INFERENCE_MODE="replay" EXTRA_PARAMS="" COLLECT_ONLY=false +TYPESCRIPT_ONLY=false # Function to display usage usage() { - cat << EOF + cat < /dev/null; then +if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &>/dev/null; then echo "llama could not be found, ensure llama-stack is installed" exit 1 fi -if ! command -v pytest &> /dev/null; then +if ! command -v pytest &>/dev/null; then echo "pytest could not be found, ensure pytest is installed" exit 1 fi +# Helper function to find next available port +find_available_port() { + local start_port=$1 + local port=$start_port + for ((i=0; i<100; i++)); do + if ! lsof -Pi :$port -sTCP:LISTEN -t >/dev/null 2>&1; then + echo $port + return 0 + fi + ((port++)) + done + echo "Failed to find available port starting from $start_port" >&2 + return 1 +} + +run_client_ts_tests() { + if ! command -v npm &>/dev/null; then + echo "npm could not be found; ensure Node.js is installed" + return 1 + fi + + pushd tests/integration/client-typescript >/dev/null + + # Determine if TS_CLIENT_PATH is a directory path or an npm version + if [[ -d "$TS_CLIENT_PATH" ]]; then + # It's a directory path - use local checkout + if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then + echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)" + popd >/dev/null + return 1 + fi + echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH" + + # Build the TypeScript client first + echo "Building TypeScript client..." + pushd "$TS_CLIENT_PATH" >/dev/null + npm install --silent + npm run build --silent + popd >/dev/null + + # Install other dependencies first + if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then + npm ci --silent + else + npm install --silent + fi + + # Then install the client from local directory + echo "Installing llama-stack-client from: $TS_CLIENT_PATH" + npm install "$TS_CLIENT_PATH" --silent + else + # It's an npm version specifier - install from npm + echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm" + if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then + npm ci --silent + npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent + else + npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent + fi + fi + + # Verify installation + echo "Verifying llama-stack-client installation..." + if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then + echo "✅ llama-stack-client successfully installed" + npm list llama-stack-client + else + echo "❌ llama-stack-client not found in node_modules" + echo "Installed packages:" + npm list --depth=0 + popd >/dev/null + return 1 + fi + + echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)" + npm test + + popd >/dev/null +} + # Start Llama Stack Server if needed if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then + # Find an available port for the server + LLAMA_STACK_PORT=$(find_available_port 8321) + if [[ $? -ne 0 ]]; then + echo "Error: $LLAMA_STACK_PORT" + exit 1 + fi + export LLAMA_STACK_PORT + export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT" + echo "Will use port: $LLAMA_STACK_PORT" + stop_server() { echo "Stopping Llama Stack Server..." - pids=$(lsof -i :8321 | awk 'NR>1 {print $2}') + pids=$(lsof -i :$LLAMA_STACK_PORT | awk 'NR>1 {print $2}') if [[ -n "$pids" ]]; then echo "Killing Llama Stack Server processes: $pids" kill -9 $pids @@ -201,33 +319,39 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then echo "Llama Stack Server stopped" } - # check if server is already running - if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then - echo "Llama Stack Server is already running, skipping start" - else - echo "=== Starting Llama Stack Server ===" - export LLAMA_STACK_LOG_WIDTH=120 + echo "=== Starting Llama Stack Server ===" + export LLAMA_STACK_LOG_WIDTH=120 - # remove "server:" from STACK_CONFIG - stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') - nohup llama stack run $stack_config > server.log 2>&1 & + # Configure telemetry collector for server mode + # Use a fixed port for the OTEL collector so the server can connect to it + COLLECTOR_PORT=4317 + export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" + export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf" + export OTEL_BSP_SCHEDULE_DELAY="200" + export OTEL_BSP_EXPORT_TIMEOUT="2000" + export OTEL_METRIC_EXPORT_INTERVAL="200" - echo "Waiting for Llama Stack Server to start..." - for i in {1..30}; do - if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then - echo "✅ Llama Stack Server started successfully" - break - fi - if [[ $i -eq 30 ]]; then - echo "❌ Llama Stack Server failed to start" - echo "Server logs:" - cat server.log - exit 1 - fi - sleep 1 - done - echo "" - fi + # remove "server:" from STACK_CONFIG + stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') + nohup llama stack run $stack_config >server.log 2>&1 & + + echo "Waiting for Llama Stack Server to start on port $LLAMA_STACK_PORT..." + for i in {1..30}; do + if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then + echo "✅ Llama Stack Server started successfully" + break + fi + if [[ $i -eq 30 ]]; then + echo "❌ Llama Stack Server failed to start" + echo "Server logs:" + cat server.log + exit 1 + fi + sleep 1 + done + echo "" trap stop_server EXIT ERR INT TERM fi @@ -239,7 +363,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then container_name="llama-stack-test-$DISTRO" if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then echo "Dumping container logs before stopping..." - docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true + docker logs "$container_name" >"docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true echo "Stopping and removing container: $container_name" docker stop "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true @@ -251,7 +375,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then # Extract distribution name from docker:distro format DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://') - export LLAMA_STACK_PORT=8321 + # Find an available port for the docker container + LLAMA_STACK_PORT=$(find_available_port 8321) + if [[ $? -ne 0 ]]; then + echo "Error: $LLAMA_STACK_PORT" + exit 1 + fi + export LLAMA_STACK_PORT + export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT" + echo "Will use port: $LLAMA_STACK_PORT" echo "=== Building Docker Image for distribution: $DISTRO ===" containerfile="$ROOT_DIR/containers/Containerfile" @@ -271,6 +403,16 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then --build-arg "LLAMA_STACK_DIR=/workspace" ) + # Pass UV index configuration for release branches + if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then + echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL" + build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL") + fi + if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then + echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY" + build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY") + fi + if ! "${build_cmd[@]}"; then echo "❌ Failed to build Docker image" exit 1 @@ -284,10 +426,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then docker stop "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true + # Configure telemetry collector port shared between host and container + COLLECTOR_PORT=4317 + export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" + # Build environment variables for docker run DOCKER_ENV_VARS="" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_MCP_HOST=${LLAMA_STACK_TEST_MCP_HOST:-host.docker.internal}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000" # Pass through API keys if they exist [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY" @@ -300,6 +452,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL" + if [[ "$TEST_SETUP" == "vllm" ]]; then + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1" + fi + # Determine the actual image name (may have localhost/ prefix) IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) if [[ -z "$IMAGE_NAME" ]]; then @@ -308,8 +464,24 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then fi echo "Using image: $IMAGE_NAME" - docker run -d --network host --name "$container_name" \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + # On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM + # Use regular port mapping instead + NETWORK_MODE="" + PORT_MAPPINGS="" + ADD_HOST_FLAG="" + if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then + NETWORK_MODE="--network host" + # On Linux with host network, also add host.docker.internal mapping for consistency + ADD_HOST_FLAG="--add-host=host.docker.internal:host-gateway" + else + # On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry + PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT" + echo "Using bridge networking with port mapping (non-Linux)" + fi + + docker run -d $NETWORK_MODE --name "$container_name" \ + $PORT_MAPPINGS \ + $ADD_HOST_FLAG \ $DOCKER_ENV_VARS \ "$IMAGE_NAME" \ --port $LLAMA_STACK_PORT @@ -340,11 +512,6 @@ fi echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" -# Additional exclusions for vllm setup -if [[ "$TEST_SETUP" == "vllm" ]]; then - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" -fi - PYTEST_PATTERN="not( $EXCLUDE_TESTS )" if [[ -n "$TEST_PATTERN" ]]; then PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" @@ -391,16 +558,23 @@ if [[ -n "$STACK_CONFIG" ]]; then STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG" fi -pytest -s -v $PYTEST_TARGET \ - $STACK_CONFIG_ARG \ - --inference-mode="$INFERENCE_MODE" \ - -k "$PYTEST_PATTERN" \ - $EXTRA_PARAMS \ - --color=yes \ - --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ - --color=yes $EXTRA_PARAMS \ - --capture=tee-sys -exit_code=$? +# Run Python tests unless typescript-only mode +if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then + pytest -s -v $PYTEST_TARGET \ + $STACK_CONFIG_ARG \ + --inference-mode="$INFERENCE_MODE" \ + -k "$PYTEST_PATTERN" \ + $EXTRA_PARAMS \ + --color=yes \ + --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ + --color=yes $EXTRA_PARAMS \ + --capture=tee-sys + exit_code=$? +else + echo "Skipping Python tests (--typescript-only mode)" + exit_code=0 +fi + set +x set -e @@ -411,22 +585,23 @@ elif [ $exit_code -eq 5 ]; then else echo "❌ Tests failed" echo "" - echo "=== Dumping last 100 lines of logs for debugging ===" - # Output server or container logs based on stack config if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then - echo "--- Last 100 lines of server.log ---" - tail -100 server.log + echo "--- Server side failures can be located inside server.log (available from artifacts on CI) ---" elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log" if [[ -f "$docker_log_file" ]]; then - echo "--- Last 100 lines of $docker_log_file ---" - tail -100 "$docker_log_file" + echo "--- Server side failures can be located inside $docker_log_file (available from artifacts on CI) ---" fi fi exit 1 fi +# Run TypeScript client tests if TS_CLIENT_PATH is set +if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then + run_client_ts_tests +fi + echo "" echo "=== Integration Tests Complete ===" diff --git a/scripts/openapi_generator/__init__.py b/scripts/openapi_generator/__init__.py new file mode 100644 index 000000000..834836f76 --- /dev/null +++ b/scripts/openapi_generator/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +OpenAPI generator module for Llama Stack. + +This module provides functionality to generate OpenAPI specifications +from FastAPI applications. +""" + +__all__ = ["generate_openapi_spec", "main"] + + +def __getattr__(name: str): + if name in {"generate_openapi_spec", "main"}: + from .main import generate_openapi_spec as _gos + from .main import main as _main + + return {"generate_openapi_spec": _gos, "main": _main}[name] + raise AttributeError(name) diff --git a/scripts/openapi_generator/__main__.py b/scripts/openapi_generator/__main__.py new file mode 100644 index 000000000..d857e5e7e --- /dev/null +++ b/scripts/openapi_generator/__main__.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Entry point for running the openapi_generator module as a package. +""" + +from .main import main + +if __name__ == "__main__": + main() diff --git a/scripts/openapi_generator/_legacy_order.py b/scripts/openapi_generator/_legacy_order.py new file mode 100644 index 000000000..72863c8fc --- /dev/null +++ b/scripts/openapi_generator/_legacy_order.py @@ -0,0 +1,502 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Temporary ordering helpers extracted from origin/main client-sdks/stainless/openapi.yml. + +These lists help the new generator match the previous ordering so that diffs +remain readable while we debug schema content regressions. Remove once stable. +""" + +LEGACY_PATH_ORDER = [ + "/v1/batches", + "/v1/batches/{batch_id}", + "/v1/batches/{batch_id}/cancel", + "/v1/chat/completions", + "/v1/chat/completions/{completion_id}", + "/v1/completions", + "/v1/conversations", + "/v1/conversations/{conversation_id}", + "/v1/conversations/{conversation_id}/items", + "/v1/conversations/{conversation_id}/items/{item_id}", + "/v1/embeddings", + "/v1/files", + "/v1/files/{file_id}", + "/v1/files/{file_id}/content", + "/v1/health", + "/v1/inspect/routes", + "/v1/models", + "/v1/models/{model_id}", + "/v1/moderations", + "/v1/prompts", + "/v1/prompts/{prompt_id}", + "/v1/prompts/{prompt_id}/set-default-version", + "/v1/prompts/{prompt_id}/versions", + "/v1/providers", + "/v1/providers/{provider_id}", + "/v1/responses", + "/v1/responses/{response_id}", + "/v1/responses/{response_id}/input_items", + "/v1/safety/run-shield", + "/v1/scoring-functions", + "/v1/scoring-functions/{scoring_fn_id}", + "/v1/scoring/score", + "/v1/scoring/score-batch", + "/v1/shields", + "/v1/shields/{identifier}", + "/v1/tool-runtime/invoke", + "/v1/tool-runtime/list-tools", + "/v1/toolgroups", + "/v1/toolgroups/{toolgroup_id}", + "/v1/tools", + "/v1/tools/{tool_name}", + "/v1/vector-io/insert", + "/v1/vector-io/query", + "/v1/vector_stores", + "/v1/vector_stores/{vector_store_id}", + "/v1/vector_stores/{vector_store_id}/file_batches", + "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}", + "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", + "/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", + "/v1/vector_stores/{vector_store_id}/files", + "/v1/vector_stores/{vector_store_id}/files/{file_id}", + "/v1/vector_stores/{vector_store_id}/files/{file_id}/content", + "/v1/vector_stores/{vector_store_id}/search", + "/v1/version", + "/v1beta/datasetio/append-rows/{dataset_id}", + "/v1beta/datasetio/iterrows/{dataset_id}", + "/v1beta/datasets", + "/v1beta/datasets/{dataset_id}", + "/v1alpha/eval/benchmarks", + "/v1alpha/eval/benchmarks/{benchmark_id}", + "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations", + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs", + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}", + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", + "/v1alpha/inference/rerank", + "/v1alpha/post-training/job/artifacts", + "/v1alpha/post-training/job/cancel", + "/v1alpha/post-training/job/status", + "/v1alpha/post-training/jobs", + "/v1alpha/post-training/preference-optimize", + "/v1alpha/post-training/supervised-fine-tune", +] + +LEGACY_SCHEMA_ORDER = [ + "Error", + "ListBatchesResponse", + "CreateBatchRequest", + "Batch", + "Order", + "ListOpenAIChatCompletionResponse", + "OpenAIAssistantMessageParam", + "OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionContentPartParam", + "OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionToolCall", + "OpenAIChatCompletionToolCallFunction", + "OpenAIChatCompletionUsage", + "OpenAIChoice", + "OpenAIChoiceLogprobs", + "OpenAIDeveloperMessageParam", + "OpenAIFile", + "OpenAIFileFile", + "OpenAIImageURL", + "OpenAIMessageParam", + "OpenAISystemMessageParam", + "OpenAITokenLogProb", + "OpenAIToolMessageParam", + "OpenAITopLogProb", + "OpenAIUserMessageParam", + "OpenAIJSONSchema", + "OpenAIResponseFormatJSONObject", + "OpenAIResponseFormatJSONSchema", + "OpenAIResponseFormatParam", + "OpenAIResponseFormatText", + "OpenAIChatCompletionRequestWithExtraBody", + "OpenAIChatCompletion", + "OpenAIChatCompletionChunk", + "OpenAIChoiceDelta", + "OpenAIChunkChoice", + "OpenAICompletionWithInputMessages", + "OpenAICompletionRequestWithExtraBody", + "OpenAICompletion", + "OpenAICompletionChoice", + "ConversationItem", + "OpenAIResponseAnnotationCitation", + "OpenAIResponseAnnotationContainerFileCitation", + "OpenAIResponseAnnotationFileCitation", + "OpenAIResponseAnnotationFilePath", + "OpenAIResponseAnnotations", + "OpenAIResponseContentPartRefusal", + "OpenAIResponseInputFunctionToolCallOutput", + "OpenAIResponseInputMessageContent", + "OpenAIResponseInputMessageContentFile", + "OpenAIResponseInputMessageContentImage", + "OpenAIResponseInputMessageContentText", + "OpenAIResponseMCPApprovalRequest", + "OpenAIResponseMCPApprovalResponse", + "OpenAIResponseMessage", + "OpenAIResponseOutputMessageContent", + "OpenAIResponseOutputMessageContentOutputText", + "OpenAIResponseOutputMessageFileSearchToolCall", + "OpenAIResponseOutputMessageFunctionToolCall", + "OpenAIResponseOutputMessageMCPCall", + "OpenAIResponseOutputMessageMCPListTools", + "OpenAIResponseOutputMessageWebSearchToolCall", + "CreateConversationRequest", + "Conversation", + "UpdateConversationRequest", + "ConversationDeletedResource", + "ConversationItemList", + "AddItemsRequest", + "ConversationItemDeletedResource", + "OpenAIEmbeddingsRequestWithExtraBody", + "OpenAIEmbeddingData", + "OpenAIEmbeddingUsage", + "OpenAIEmbeddingsResponse", + "OpenAIFilePurpose", + "ListOpenAIFileResponse", + "OpenAIFileObject", + "ExpiresAfter", + "OpenAIFileDeleteResponse", + "Response", + "HealthInfo", + "RouteInfo", + "ListRoutesResponse", + "OpenAIModel", + "OpenAIListModelsResponse", + "Model", + "ModelType", + "RunModerationRequest", + "ModerationObject", + "ModerationObjectResults", + "Prompt", + "ListPromptsResponse", + "CreatePromptRequest", + "UpdatePromptRequest", + "SetDefaultVersionRequest", + "ProviderInfo", + "ListProvidersResponse", + "ListOpenAIResponseObject", + "OpenAIResponseError", + "OpenAIResponseInput", + "OpenAIResponseInputToolFileSearch", + "OpenAIResponseInputToolFunction", + "OpenAIResponseInputToolWebSearch", + "OpenAIResponseObjectWithInput", + "OpenAIResponseOutput", + "OpenAIResponsePrompt", + "OpenAIResponseText", + "OpenAIResponseTool", + "OpenAIResponseToolMCP", + "OpenAIResponseUsage", + "ResponseGuardrailSpec", + "OpenAIResponseInputTool", + "OpenAIResponseInputToolMCP", + "CreateOpenaiResponseRequest", + "OpenAIResponseObject", + "OpenAIResponseContentPartOutputText", + "OpenAIResponseContentPartReasoningSummary", + "OpenAIResponseContentPartReasoningText", + "OpenAIResponseObjectStream", + "OpenAIResponseObjectStreamResponseCompleted", + "OpenAIResponseObjectStreamResponseContentPartAdded", + "OpenAIResponseObjectStreamResponseContentPartDone", + "OpenAIResponseObjectStreamResponseCreated", + "OpenAIResponseObjectStreamResponseFailed", + "OpenAIResponseObjectStreamResponseFileSearchCallCompleted", + "OpenAIResponseObjectStreamResponseFileSearchCallInProgress", + "OpenAIResponseObjectStreamResponseFileSearchCallSearching", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", + "OpenAIResponseObjectStreamResponseInProgress", + "OpenAIResponseObjectStreamResponseIncomplete", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone", + "OpenAIResponseObjectStreamResponseMcpCallCompleted", + "OpenAIResponseObjectStreamResponseMcpCallFailed", + "OpenAIResponseObjectStreamResponseMcpCallInProgress", + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted", + "OpenAIResponseObjectStreamResponseMcpListToolsFailed", + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress", + "OpenAIResponseObjectStreamResponseOutputItemAdded", + "OpenAIResponseObjectStreamResponseOutputItemDone", + "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded", + "OpenAIResponseObjectStreamResponseOutputTextDelta", + "OpenAIResponseObjectStreamResponseOutputTextDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone", + "OpenAIResponseObjectStreamResponseReasoningTextDelta", + "OpenAIResponseObjectStreamResponseReasoningTextDone", + "OpenAIResponseObjectStreamResponseRefusalDelta", + "OpenAIResponseObjectStreamResponseRefusalDone", + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted", + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress", + "OpenAIResponseObjectStreamResponseWebSearchCallSearching", + "OpenAIDeleteResponseObject", + "ListOpenAIResponseInputItem", + "RunShieldRequest", + "RunShieldResponse", + "SafetyViolation", + "ViolationLevel", + "AggregationFunctionType", + "ArrayType", + "BasicScoringFnParams", + "BooleanType", + "ChatCompletionInputType", + "CompletionInputType", + "JsonType", + "LLMAsJudgeScoringFnParams", + "NumberType", + "ObjectType", + "RegexParserScoringFnParams", + "ScoringFn", + "ScoringFnParams", + "ScoringFnParamsType", + "StringType", + "UnionType", + "ListScoringFunctionsResponse", + "ScoreRequest", + "ScoreResponse", + "ScoringResult", + "ScoreBatchRequest", + "ScoreBatchResponse", + "Shield", + "ListShieldsResponse", + "InvokeToolRequest", + "ImageContentItem", + "InterleavedContent", + "InterleavedContentItem", + "TextContentItem", + "ToolInvocationResult", + "URL", + "ToolDef", + "ListToolDefsResponse", + "ToolGroup", + "ListToolGroupsResponse", + "Chunk", + "ChunkMetadata", + "InsertChunksRequest", + "QueryChunksRequest", + "QueryChunksResponse", + "VectorStoreFileCounts", + "VectorStoreListResponse", + "VectorStoreObject", + "VectorStoreChunkingStrategy", + "VectorStoreChunkingStrategyAuto", + "VectorStoreChunkingStrategyStatic", + "VectorStoreChunkingStrategyStaticConfig", + "OpenAICreateVectorStoreRequestWithExtraBody", + "OpenaiUpdateVectorStoreRequest", + "VectorStoreDeleteResponse", + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "VectorStoreFileBatchObject", + "VectorStoreFileStatus", + "VectorStoreFileLastError", + "VectorStoreFileObject", + "VectorStoreFilesListInBatchResponse", + "VectorStoreListFilesResponse", + "OpenaiAttachFileToVectorStoreRequest", + "OpenaiUpdateVectorStoreFileRequest", + "VectorStoreFileDeleteResponse", + "bool", + "VectorStoreContent", + "VectorStoreFileContentResponse", + "OpenaiSearchVectorStoreRequest", + "VectorStoreSearchResponse", + "VectorStoreSearchResponsePage", + "VersionInfo", + "AppendRowsRequest", + "PaginatedResponse", + "Dataset", + "RowsDataSource", + "URIDataSource", + "ListDatasetsResponse", + "Benchmark", + "ListBenchmarksResponse", + "BenchmarkConfig", + "GreedySamplingStrategy", + "ModelCandidate", + "SamplingParams", + "SystemMessage", + "TopKSamplingStrategy", + "TopPSamplingStrategy", + "EvaluateRowsRequest", + "EvaluateResponse", + "RunEvalRequest", + "Job", + "RerankRequest", + "RerankData", + "RerankResponse", + "Checkpoint", + "PostTrainingJobArtifactsResponse", + "PostTrainingMetric", + "CancelTrainingJobRequest", + "PostTrainingJobStatusResponse", + "ListPostTrainingJobsResponse", + "DPOAlignmentConfig", + "DPOLossType", + "DataConfig", + "DatasetFormat", + "EfficiencyConfig", + "OptimizerConfig", + "OptimizerType", + "TrainingConfig", + "PreferenceOptimizeRequest", + "PostTrainingJob", + "AlgorithmConfig", + "LoraFinetuningConfig", + "QATFinetuningConfig", + "SupervisedFineTuneRequest", + "RegisterModelRequest", + "ParamType", + "RegisterScoringFunctionRequest", + "RegisterShieldRequest", + "RegisterToolGroupRequest", + "DataSource", + "RegisterDatasetRequest", + "RegisterBenchmarkRequest", +] + +LEGACY_RESPONSE_ORDER = ["BadRequest400", "TooManyRequests429", "InternalServerError500", "DefaultError"] + +LEGACY_TAGS = [ + { + "description": "APIs for creating and interacting with agentic systems.", + "name": "Agents", + "x-displayName": "Agents", + }, + { + "description": "The API is designed to allow use of openai client libraries for seamless integration.\n" + "\n" + "This API provides the following extensions:\n" + " - idempotent batch creation\n" + "\n" + "Note: This API is currently under active development and may undergo changes.", + "name": "Batches", + "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, " + "particularly useful for processing large datasets, batch evaluation workflows, and cost-effective " + "inference at scale.", + }, + {"description": "", "name": "Benchmarks"}, + { + "description": "Protocol for conversation management operations.", + "name": "Conversations", + "x-displayName": "Conversations", + }, + {"description": "", "name": "DatasetIO"}, + {"description": "", "name": "Datasets"}, + { + "description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.", + "name": "Eval", + "x-displayName": "Evaluations", + }, + { + "description": "This API is used to upload documents that can be used with other Llama Stack APIs.", + "name": "Files", + "x-displayName": "Files", + }, + { + "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n" + "\n" + "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n" + '- LLM models: these models generate "raw" and "chat" (conversational) completions.\n' + "- Embedding models: these models generate embeddings to be used for semantic search.\n" + "- Rerank models: these models reorder the documents based on their relevance to a query.", + "name": "Inference", + "x-displayName": "Inference", + }, + { + "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with " + "methods and implementing providers.", + "name": "Inspect", + "x-displayName": "Inspect", + }, + {"description": "", "name": "Models"}, + {"description": "", "name": "PostTraining (Coming Soon)"}, + {"description": "Protocol for prompt management operations.", "name": "Prompts", "x-displayName": "Prompts"}, + { + "description": "Providers API for inspecting, listing, and modifying providers and their configurations.", + "name": "Providers", + "x-displayName": "Providers", + }, + {"description": "OpenAI-compatible Moderations API.", "name": "Safety", "x-displayName": "Safety"}, + {"description": "", "name": "Scoring"}, + {"description": "", "name": "ScoringFunctions"}, + {"description": "", "name": "Shields"}, + {"description": "", "name": "ToolGroups"}, + {"description": "", "name": "ToolRuntime"}, + {"description": "", "name": "VectorIO"}, +] + +LEGACY_TAG_ORDER = [ + "Agents", + "Batches", + "Benchmarks", + "Conversations", + "DatasetIO", + "Datasets", + "Eval", + "Files", + "Inference", + "Inspect", + "Models", + "PostTraining (Coming Soon)", + "Prompts", + "Providers", + "Safety", + "Scoring", + "ScoringFunctions", + "Shields", + "ToolGroups", + "ToolRuntime", + "VectorIO", +] + +LEGACY_TAG_GROUPS = [ + { + "name": "Operations", + "tags": [ + "Agents", + "Batches", + "Benchmarks", + "Conversations", + "DatasetIO", + "Datasets", + "Eval", + "Files", + "Inference", + "Inspect", + "Models", + "PostTraining (Coming Soon)", + "Prompts", + "Providers", + "Safety", + "Scoring", + "ScoringFunctions", + "Shields", + "ToolGroups", + "ToolRuntime", + "VectorIO", + ], + } +] + +LEGACY_SECURITY = [{"Default": []}] + +LEGACY_OPERATION_KEYS = [ + "responses", + "tags", + "summary", + "description", + "operationId", + "parameters", + "requestBody", + "deprecated", +] diff --git a/scripts/openapi_generator/app.py b/scripts/openapi_generator/app.py new file mode 100644 index 000000000..d972889cd --- /dev/null +++ b/scripts/openapi_generator/app.py @@ -0,0 +1,91 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +FastAPI app creation for OpenAPI generation. +""" + +import inspect +from typing import Any + +from fastapi import FastAPI + +from llama_stack.core.resolver import api_protocol_map +from llama_stack_api import Api + +from .state import _protocol_methods_cache + + +def _get_protocol_method(api: Api, method_name: str) -> Any | None: + """ + Get a protocol method function by API and method name. + Uses caching to avoid repeated lookups. + + Args: + api: The API enum + method_name: The method name (function name) + + Returns: + The function object, or None if not found + """ + global _protocol_methods_cache + + if _protocol_methods_cache is None: + _protocol_methods_cache = {} + protocols = api_protocol_map() + from llama_stack_api.tools import SpecialToolGroup, ToolRuntime + + toolgroup_protocols = { + SpecialToolGroup.rag_tool: ToolRuntime, + } + + for api_key, protocol in protocols.items(): + method_map: dict[str, Any] = {} + protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) + for name, method in protocol_methods: + method_map[name] = method + + # Handle tool_runtime special case + if api_key == Api.tool_runtime: + for tool_group, sub_protocol in toolgroup_protocols.items(): + sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction) + for name, method in sub_protocol_methods: + if hasattr(method, "__webmethod__"): + method_map[f"{tool_group.value}.{name}"] = method + + _protocol_methods_cache[api_key] = method_map + + return _protocol_methods_cache.get(api, {}).get(method_name) + + +def create_llama_stack_app() -> FastAPI: + """ + Create a FastAPI app that represents the Llama Stack API. + This uses the existing route discovery system to automatically find all routes. + """ + app = FastAPI( + title="Llama Stack API", + description="A comprehensive API for building and deploying AI applications", + version="1.0.0", + servers=[ + {"url": "http://any-hosted-llama-stack.com"}, + ], + ) + + # Get all API routes + from llama_stack.core.server.routes import get_all_api_routes + + api_routes = get_all_api_routes() + + # Create FastAPI routes from the discovered routes + from . import endpoints + + for api, routes in api_routes.items(): + for route, webmethod in routes: + # Convert the route to a FastAPI endpoint + endpoints._create_fastapi_endpoint(app, route, webmethod, api) + + return app diff --git a/scripts/openapi_generator/endpoints.py b/scripts/openapi_generator/endpoints.py new file mode 100644 index 000000000..85203cb71 --- /dev/null +++ b/scripts/openapi_generator/endpoints.py @@ -0,0 +1,658 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Endpoint generation logic for FastAPI OpenAPI generation. +""" + +import inspect +import re +import types +import typing +from typing import Annotated, Any, get_args, get_origin + +from fastapi import FastAPI +from fastapi.params import Body as FastAPIBody +from pydantic import Field, create_model + +from llama_stack.log import get_logger +from llama_stack_api import Api +from llama_stack_api.schema_utils import get_registered_schema_info + +from . import app as app_module +from .state import _extra_body_fields, register_dynamic_model + +logger = get_logger(name=__name__, category="core") + +type QueryParameter = tuple[str, type, Any, bool] + + +def _to_pascal_case(segment: str) -> str: + tokens = re.findall(r"[A-Za-z]+|\d+", segment) + return "".join(token.capitalize() for token in tokens if token) + + +def _compose_request_model_name(api: Api, method_name: str, variant: str | None = None) -> str: + """Generate a deterministic model name from the protocol method.""" + + def _to_pascal_from_snake(value: str) -> str: + return "".join(segment.capitalize() for segment in value.split("_") if segment) + + base_name = _to_pascal_from_snake(method_name) + if not base_name: + base_name = _to_pascal_case(api.value) + base_name = f"{base_name}Request" + if variant: + base_name = f"{base_name}{variant}" + return base_name + + +def _extract_path_parameters(path: str) -> list[dict[str, Any]]: + """Extract path parameters from a URL path and return them as OpenAPI parameter definitions.""" + matches = re.findall(r"\{([^}:]+)(?::[^}]+)?\}", path) + return [ + { + "name": param_name, + "in": "path", + "required": True, + "schema": {"type": "string"}, + "description": f"Path parameter: {param_name}", + } + for param_name in matches + ] + + +def _create_endpoint_with_request_model( + request_model: type, response_model: type | None, operation_description: str | None +): + """Create an endpoint function with a request body model.""" + + async def endpoint(request: request_model) -> response_model: + return response_model() if response_model else {} + + if operation_description: + endpoint.__doc__ = operation_description + return endpoint + + +def _build_field_definitions(query_parameters: list[QueryParameter], use_any: bool = False) -> dict[str, tuple]: + """Build field definitions for a Pydantic model from query parameters.""" + from typing import Any + + field_definitions = {} + for param_name, param_type, default_value, _ in query_parameters: + if use_any: + field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value) + continue + + base_type = param_type + extracted_field = None + if get_origin(param_type) is Annotated: + args = get_args(param_type) + if args: + base_type = args[0] + for arg in args[1:]: + if isinstance(arg, Field): + extracted_field = arg + break + + try: + if extracted_field: + field_definitions[param_name] = (base_type, extracted_field) + else: + field_definitions[param_name] = ( + base_type, + ... if default_value is inspect.Parameter.empty else default_value, + ) + except (TypeError, ValueError): + field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value) + + # Ensure all parameters are included + expected_params = {name for name, _, _, _ in query_parameters} + missing = expected_params - set(field_definitions.keys()) + if missing: + for param_name, _, default_value, _ in query_parameters: + if param_name in missing: + field_definitions[param_name] = ( + Any, + ... if default_value is inspect.Parameter.empty else default_value, + ) + + return field_definitions + + +def _create_dynamic_request_model( + api: Api, + webmethod, + method_name: str, + http_method: str, + query_parameters: list[QueryParameter], + use_any: bool = False, + variant_suffix: str | None = None, +) -> type | None: + """Create a dynamic Pydantic model for request body.""" + try: + field_definitions = _build_field_definitions(query_parameters, use_any) + if not field_definitions: + return None + model_name = _compose_request_model_name(api, method_name, variant_suffix or None) + request_model = create_model(model_name, **field_definitions) + return register_dynamic_model(model_name, request_model) + except Exception: + return None + + +def _build_signature_params( + query_parameters: list[QueryParameter], +) -> tuple[list[inspect.Parameter], dict[str, type]]: + """Build signature parameters and annotations from query parameters.""" + signature_params = [] + param_annotations = {} + for param_name, param_type, default_value, _ in query_parameters: + param_annotations[param_name] = param_type + signature_params.append( + inspect.Parameter( + param_name, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=default_value if default_value is not inspect.Parameter.empty else inspect.Parameter.empty, + annotation=param_type, + ) + ) + return signature_params, param_annotations + + +def _extract_operation_description_from_docstring(api: Api, method_name: str) -> str | None: + """Extract operation description from the actual function docstring.""" + func = app_module._get_protocol_method(api, method_name) + if not func or not func.__doc__: + return None + + doc_lines = func.__doc__.split("\n") + description_lines = [] + metadata_markers = (":param", ":type", ":return", ":returns", ":raises", ":exception", ":yield", ":yields", ":cvar") + + for line in doc_lines: + if line.strip().startswith(metadata_markers): + break + description_lines.append(line) + + description = "\n".join(description_lines).strip() + return description if description else None + + +def _extract_response_description_from_docstring(webmethod, response_model, api: Api, method_name: str) -> str: + """Extract response description from the actual function docstring.""" + func = app_module._get_protocol_method(api, method_name) + if not func or not func.__doc__: + return "Successful Response" + for line in func.__doc__.split("\n"): + if line.strip().startswith(":returns:"): + if desc := line.strip()[9:].strip(): + return desc + return "Successful Response" + + +def _get_tag_from_api(api: Api) -> str: + """Extract a tag name from the API enum for API grouping.""" + return api.value.replace("_", " ").title() + + +def _is_file_or_form_param(param_type: Any) -> bool: + """Check if a parameter type is annotated with File() or Form().""" + if get_origin(param_type) is Annotated: + args = get_args(param_type) + if len(args) > 1: + # Check metadata for File or Form + for metadata in args[1:]: + # Check if it's a File or Form instance + if hasattr(metadata, "__class__"): + class_name = metadata.__class__.__name__ + if class_name in ("File", "Form"): + return True + return False + + +def _is_extra_body_field(metadata_item: Any) -> bool: + """Check if a metadata item is an ExtraBodyField instance.""" + from llama_stack_api.schema_utils import ExtraBodyField + + return isinstance(metadata_item, ExtraBodyField) + + +def _should_embed_parameter(param_type: Any) -> bool: + """Determine whether a parameter should be embedded (wrapped) in the request body.""" + if get_origin(param_type) is Annotated: + args = get_args(param_type) + metadata = args[1:] if len(args) > 1 else [] + for metadata_item in metadata: + if isinstance(metadata_item, FastAPIBody): + # FastAPI treats embed=None as False, so default to False when unset. + return bool(metadata_item.embed) + # Unannotated parameters default to embed=True through create_dynamic_typed_route. + return True + + +def _is_async_iterator_type(type_obj: Any) -> bool: + """Check if a type is AsyncIterator or AsyncIterable.""" + from collections.abc import AsyncIterable, AsyncIterator + + origin = get_origin(type_obj) + if origin is None: + # Check if it's the class itself + return type_obj in (AsyncIterator, AsyncIterable) or ( + hasattr(type_obj, "__origin__") and type_obj.__origin__ in (AsyncIterator, AsyncIterable) + ) + return origin in (AsyncIterator, AsyncIterable) + + +def _extract_response_models_from_union(union_type: Any) -> tuple[type | None, type | None]: + """ + Extract non-streaming and streaming response models from a union type. + + Returns: + tuple: (non_streaming_model, streaming_model) + """ + non_streaming_model = None + streaming_model = None + + args = get_args(union_type) + for arg in args: + # Check if it's an AsyncIterator + if _is_async_iterator_type(arg): + # Extract the type argument from AsyncIterator[T] + iterator_args = get_args(arg) + if iterator_args: + inner_type = iterator_args[0] + # Check if the inner type is a registered schema (union type) + # or a Pydantic model + if hasattr(inner_type, "model_json_schema"): + streaming_model = inner_type + else: + # Might be a registered schema - check if it's registered + if get_registered_schema_info(inner_type): + # We'll need to look this up later, but for now store the type + streaming_model = inner_type + elif hasattr(arg, "model_json_schema"): + # Non-streaming Pydantic model + if non_streaming_model is None: + non_streaming_model = arg + + return non_streaming_model, streaming_model + + +def _find_models_for_endpoint( + webmethod, api: Api, method_name: str, is_post_put: bool = False +) -> tuple[type | None, type | None, list[tuple[str, type, Any]], list[inspect.Parameter], type | None, str | None]: + """ + Find appropriate request and response models for an endpoint by analyzing the actual function signature. + This uses the protocol function to determine the correct models dynamically. + + Args: + webmethod: The webmethod metadata + api: The API enum for looking up the function + method_name: The method name (function name) + is_post_put: Whether this is a POST, PUT, or PATCH request (GET requests should never have request bodies) + + Returns: + tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name) + where query_parameters is a list of (name, type, default_value, should_embed) tuples + and file_form_params is a list of inspect.Parameter objects for File()/Form() params + and streaming_response_model is the model for streaming responses (AsyncIterator content) + """ + route_descriptor = f"{webmethod.method or 'UNKNOWN'} {webmethod.route}" + try: + # Get the function from the protocol + func = app_module._get_protocol_method(api, method_name) + if not func: + logger.warning("No protocol method for %s.%s (%s)", api, method_name, route_descriptor) + return None, None, [], [], None, None + + # Analyze the function signature + sig = inspect.signature(func) + + # Find request model and collect all body parameters + request_model = None + query_parameters: list[QueryParameter] = [] + file_form_params = [] + path_params = set() + extra_body_params = [] + response_schema_name = None + + # Extract path parameters from the route + if webmethod and hasattr(webmethod, "route"): + path_matches = re.findall(r"\{([^}:]+)(?::[^}]+)?\}", webmethod.route) + path_params = set(path_matches) + + for param_name, param in sig.parameters.items(): + if param_name == "self": + continue + + # Skip *args and **kwargs parameters - these are not real API parameters + if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD): + continue + + # Check if this is a path parameter + if param_name in path_params: + # Path parameters are handled separately, skip them + continue + + # Check if it's a File() or Form() parameter - these need special handling + param_type = param.annotation + param_should_embed = _should_embed_parameter(param_type) + if _is_file_or_form_param(param_type): + # File() and Form() parameters must be in the function signature directly + # They cannot be part of a Pydantic model + file_form_params.append(param) + continue + + # Check for ExtraBodyField in Annotated types + is_extra_body = False + extra_body_description = None + if get_origin(param_type) is Annotated: + args = get_args(param_type) + base_type = args[0] if args else param_type + metadata = args[1:] if len(args) > 1 else [] + + # Check if any metadata item is an ExtraBodyField + for metadata_item in metadata: + if _is_extra_body_field(metadata_item): + is_extra_body = True + extra_body_description = metadata_item.description + break + + if is_extra_body: + # Store as extra body parameter - exclude from request model + extra_body_params.append((param_name, base_type, extra_body_description)) + continue + param_type = base_type + + # Check if it's a Pydantic model (for POST/PUT requests) + if hasattr(param_type, "model_json_schema"): + query_parameters.append((param_name, param_type, param.default, param_should_embed)) + else: + # Regular annotated parameter (but not File/Form, already handled above) + query_parameters.append((param_name, param_type, param.default, param_should_embed)) + + # Store extra body fields for later use in post-processing + # We'll store them when the endpoint is created, as we need the full path + # For now, attach to the function for later retrieval + if extra_body_params: + func._extra_body_params = extra_body_params # type: ignore + + # If there's exactly one body parameter and it's a Pydantic model, use it directly + # Otherwise, we'll create a combined request model from all parameters + # BUT: For GET requests, never create a request body - all parameters should be query parameters + if is_post_put and len(query_parameters) == 1: + param_name, param_type, default_value, should_embed = query_parameters[0] + if hasattr(param_type, "model_json_schema") and not should_embed: + request_model = param_type + query_parameters = [] # Clear query_parameters so we use the single model + + # Find response model from return annotation + # Also detect streaming response models (AsyncIterator) + response_model = None + streaming_response_model = None + return_annotation = sig.return_annotation + if return_annotation != inspect.Signature.empty: + origin = get_origin(return_annotation) + if hasattr(return_annotation, "model_json_schema"): + response_model = return_annotation + elif origin is Annotated: + # Handle Annotated return types + args = get_args(return_annotation) + if args: + # Check if the first argument is a Pydantic model + if hasattr(args[0], "model_json_schema"): + response_model = args[0] + else: + # Check if the first argument is a union type + inner_origin = get_origin(args[0]) + if inner_origin is not None and ( + inner_origin is types.UnionType or inner_origin is typing.Union + ): + response_model, streaming_response_model = _extract_response_models_from_union(args[0]) + elif origin is not None and (origin is types.UnionType or origin is typing.Union): + # Handle union types - extract both non-streaming and streaming models + response_model, streaming_response_model = _extract_response_models_from_union(return_annotation) + else: + try: + from fastapi import Response as FastAPIResponse + except ImportError: + fastapi_response_cls = None + else: + fastapi_response_cls = FastAPIResponse + try: + from starlette.responses import Response as StarletteResponse + except ImportError: + starlette_response_cls = None + else: + starlette_response_cls = StarletteResponse + + response_types = tuple(t for t in (fastapi_response_cls, starlette_response_cls) if t is not None) + if response_types and any(return_annotation is t for t in response_types): + response_schema_name = "Response" + + return ( + request_model, + response_model, + query_parameters, + file_form_params, + streaming_response_model, + response_schema_name, + ) + + except Exception as exc: + logger.warning( + "Failed to analyze endpoint %s.%s (%s): %s", api, method_name, route_descriptor, exc, exc_info=True + ) + return None, None, [], [], None, None + + +def _create_fastapi_endpoint(app: FastAPI, route, webmethod, api: Api): + """Create a FastAPI endpoint from a discovered route and webmethod.""" + path = route.path + raw_methods = route.methods or set() + method_list = sorted({method.upper() for method in raw_methods if method and method.upper() != "HEAD"}) + if not method_list: + method_list = ["GET"] + primary_method = method_list[0] + name = route.name + fastapi_path = path.replace("{", "{").replace("}", "}") + is_post_put = any(method in ["POST", "PUT", "PATCH"] for method in method_list) + + ( + request_model, + response_model, + query_parameters, + file_form_params, + streaming_response_model, + response_schema_name, + ) = _find_models_for_endpoint(webmethod, api, name, is_post_put) + operation_description = _extract_operation_description_from_docstring(api, name) + response_description = _extract_response_description_from_docstring(webmethod, response_model, api, name) + + # Retrieve and store extra body fields for this endpoint + func = app_module._get_protocol_method(api, name) + extra_body_params = getattr(func, "_extra_body_params", []) if func else [] + if extra_body_params: + for method in method_list: + key = (fastapi_path, method.upper()) + _extra_body_fields[key] = extra_body_params + + if is_post_put and not request_model and not file_form_params and query_parameters: + request_model = _create_dynamic_request_model( + api, webmethod, name, primary_method, query_parameters, use_any=False + ) + if not request_model: + request_model = _create_dynamic_request_model( + api, webmethod, name, primary_method, query_parameters, use_any=True, variant_suffix="Loose" + ) + if request_model: + query_parameters = [] + + if file_form_params and is_post_put: + signature_params = list(file_form_params) + param_annotations = {param.name: param.annotation for param in file_form_params} + for param_name, param_type, default_value, _ in query_parameters: + signature_params.append( + inspect.Parameter( + param_name, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=default_value if default_value is not inspect.Parameter.empty else inspect.Parameter.empty, + annotation=param_type, + ) + ) + param_annotations[param_name] = param_type + + async def file_form_endpoint(): + return response_model() if response_model else {} + + if operation_description: + file_form_endpoint.__doc__ = operation_description + file_form_endpoint.__signature__ = inspect.Signature(signature_params) + file_form_endpoint.__annotations__ = param_annotations + endpoint_func = file_form_endpoint + elif request_model and response_model: + endpoint_func = _create_endpoint_with_request_model(request_model, response_model, operation_description) + elif request_model: + endpoint_func = _create_endpoint_with_request_model(request_model, None, operation_description) + elif response_model and query_parameters: + if is_post_put: + request_model = _create_dynamic_request_model( + api, webmethod, name, primary_method, query_parameters, use_any=False + ) + if not request_model: + request_model = _create_dynamic_request_model( + api, webmethod, name, primary_method, query_parameters, use_any=True, variant_suffix="Loose" + ) + + if request_model: + endpoint_func = _create_endpoint_with_request_model( + request_model, response_model, operation_description + ) + else: + + async def empty_endpoint() -> response_model: + return response_model() if response_model else {} + + if operation_description: + empty_endpoint.__doc__ = operation_description + endpoint_func = empty_endpoint + else: + sorted_params = sorted(query_parameters, key=lambda x: (x[2] is not inspect.Parameter.empty, x[0])) + signature_params, param_annotations = _build_signature_params(sorted_params) + + async def query_endpoint(): + return response_model() + + if operation_description: + query_endpoint.__doc__ = operation_description + query_endpoint.__signature__ = inspect.Signature(signature_params) + query_endpoint.__annotations__ = param_annotations + endpoint_func = query_endpoint + elif response_model: + + async def response_only_endpoint() -> response_model: + return response_model() + + if operation_description: + response_only_endpoint.__doc__ = operation_description + endpoint_func = response_only_endpoint + elif query_parameters: + signature_params, param_annotations = _build_signature_params(query_parameters) + + async def params_only_endpoint(): + return {} + + if operation_description: + params_only_endpoint.__doc__ = operation_description + params_only_endpoint.__signature__ = inspect.Signature(signature_params) + params_only_endpoint.__annotations__ = param_annotations + endpoint_func = params_only_endpoint + else: + # Endpoint with no parameters and no response model + # If we have a response_model from the function signature, use it even if _find_models_for_endpoint didn't find it + # This can happen if there was an exception during model finding + if response_model is None: + # Try to get response model directly from the function signature as a fallback + func = app_module._get_protocol_method(api, name) + if func: + try: + sig = inspect.signature(func) + return_annotation = sig.return_annotation + if return_annotation != inspect.Signature.empty: + if hasattr(return_annotation, "model_json_schema"): + response_model = return_annotation + elif get_origin(return_annotation) is Annotated: + args = get_args(return_annotation) + if args and hasattr(args[0], "model_json_schema"): + response_model = args[0] + except Exception: + pass + + if response_model: + + async def no_params_endpoint() -> response_model: + return response_model() if response_model else {} + else: + + async def no_params_endpoint(): + return {} + + if operation_description: + no_params_endpoint.__doc__ = operation_description + endpoint_func = no_params_endpoint + + # Build response content with both application/json and text/event-stream if streaming + response_content: dict[str, Any] = {} + if response_model: + response_content["application/json"] = {"schema": {"$ref": f"#/components/schemas/{response_model.__name__}"}} + elif response_schema_name: + response_content["application/json"] = {"schema": {"$ref": f"#/components/schemas/{response_schema_name}"}} + if streaming_response_model: + # Get the schema name for the streaming model + # It might be a registered schema or a Pydantic model + streaming_schema_name = None + # Check if it's a registered schema first (before checking __name__) + # because registered schemas might be Annotated types + if schema_info := get_registered_schema_info(streaming_response_model): + streaming_schema_name = schema_info.name + elif hasattr(streaming_response_model, "__name__"): + streaming_schema_name = streaming_response_model.__name__ + + if streaming_schema_name: + response_content["text/event-stream"] = { + "schema": {"$ref": f"#/components/schemas/{streaming_schema_name}"} + } + + # If no content types, use empty schema + # Add the endpoint to the FastAPI app + is_deprecated = webmethod.deprecated or False + route_kwargs = { + "name": name, + "tags": [_get_tag_from_api(api)], + "deprecated": is_deprecated, + "responses": { + 400: {"$ref": "#/components/responses/BadRequest400"}, + 429: {"$ref": "#/components/responses/TooManyRequests429"}, + 500: {"$ref": "#/components/responses/InternalServerError500"}, + "default": {"$ref": "#/components/responses/DefaultError"}, + }, + } + success_response: dict[str, Any] = {"description": response_description} + if response_content: + success_response["content"] = response_content + route_kwargs["responses"][200] = success_response + + # FastAPI needs response_model parameter to properly generate OpenAPI spec + # Use the non-streaming response model if available + if response_model: + route_kwargs["response_model"] = response_model + + method_map = {"GET": app.get, "POST": app.post, "PUT": app.put, "DELETE": app.delete, "PATCH": app.patch} + for method in method_list: + if handler := method_map.get(method): + handler(fastapi_path, **route_kwargs)(endpoint_func) diff --git a/scripts/openapi_generator/main.py b/scripts/openapi_generator/main.py new file mode 100755 index 000000000..e881ff726 --- /dev/null +++ b/scripts/openapi_generator/main.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Main entry point for the FastAPI OpenAPI generator. +""" + +import copy +from pathlib import Path +from typing import Any + +import yaml +from fastapi.openapi.utils import get_openapi + +from . import app, schema_collection, schema_filtering, schema_transforms, state + + +def generate_openapi_spec(output_dir: str) -> dict[str, Any]: + """ + Generate OpenAPI specification using FastAPI's built-in method. + + Args: + output_dir: Directory to save the generated files + + Returns: + The generated OpenAPI specification as a dictionary + """ + state.reset_generator_state() + # Create the FastAPI app + fastapi_app = app.create_llama_stack_app() + + # Generate the OpenAPI schema + openapi_schema = get_openapi( + title=fastapi_app.title, + version=fastapi_app.version, + description=fastapi_app.description, + routes=fastapi_app.routes, + servers=fastapi_app.servers, + ) + + # Set OpenAPI version to 3.1.0 + openapi_schema["openapi"] = "3.1.0" + + # Add standard error responses + openapi_schema = schema_transforms._add_error_responses(openapi_schema) + + # Ensure all @json_schema_type decorated models are included + openapi_schema = schema_collection._ensure_json_schema_types_included(openapi_schema) + + # Fix $ref references to point to components/schemas instead of $defs + openapi_schema = schema_transforms._fix_ref_references(openapi_schema) + + # Fix path parameter resolution issues + openapi_schema = schema_transforms._fix_path_parameters(openapi_schema) + + # Eliminate $defs section entirely for oasdiff compatibility + openapi_schema = schema_transforms._eliminate_defs_section(openapi_schema) + + # Clean descriptions in schema definitions by removing docstring metadata + openapi_schema = schema_transforms._clean_schema_descriptions(openapi_schema) + openapi_schema = schema_transforms._normalize_empty_responses(openapi_schema) + + # Remove query parameters from POST/PUT/PATCH endpoints that have a request body + # FastAPI sometimes infers parameters as query params even when they should be in the request body + openapi_schema = schema_transforms._remove_query_params_from_body_endpoints(openapi_schema) + + # Add x-llama-stack-extra-body-params extension for ExtraBodyField parameters + openapi_schema = schema_transforms._add_extra_body_params_extension(openapi_schema) + + # Remove request bodies from GET endpoints (GET requests should never have request bodies) + # This must run AFTER _add_extra_body_params_extension to ensure any request bodies + # that FastAPI incorrectly added to GET endpoints are removed + openapi_schema = schema_transforms._remove_request_bodies_from_get_endpoints(openapi_schema) + + # Extract duplicate union types to shared schema references + openapi_schema = schema_transforms._extract_duplicate_union_types(openapi_schema) + + # Split into stable (v1 only), experimental (v1alpha + v1beta), deprecated, and combined (stainless) specs + # Each spec needs its own deep copy of the full schema to avoid cross-contamination + stable_schema = schema_filtering._filter_schema_by_version( + copy.deepcopy(openapi_schema), stable_only=True, exclude_deprecated=True + ) + experimental_schema = schema_filtering._filter_schema_by_version( + copy.deepcopy(openapi_schema), stable_only=False, exclude_deprecated=True + ) + deprecated_schema = schema_filtering._filter_deprecated_schema(copy.deepcopy(openapi_schema)) + combined_schema = schema_filtering._filter_combined_schema(copy.deepcopy(openapi_schema)) + + # Apply duplicate union extraction to combined schema (used by Stainless) + combined_schema = schema_transforms._extract_duplicate_union_types(combined_schema) + + base_description = ( + "This is the specification of the Llama Stack that provides\n" + " a set of endpoints and their corresponding interfaces that are\n" + " tailored to\n" + " best leverage Llama Models." + ) + + schema_configs = [ + ( + stable_schema, + "Llama Stack Specification", + "**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.", + ), + ( + experimental_schema, + "Llama Stack Specification - Experimental APIs", + "**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before\n becoming stable.", + ), + ( + deprecated_schema, + "Llama Stack Specification - Deprecated APIs", + "**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for\n migration reference only.", + ), + ( + combined_schema, + "Llama Stack Specification - Stable & Experimental APIs", + "**🔗 COMBINED**: This specification includes both stable production-ready APIs\n and experimental pre-release APIs. Use stable APIs for production deployments\n and experimental APIs for testing new features.", + ), + ] + + for schema, title, description_suffix in schema_configs: + if "info" not in schema: + schema["info"] = {} + schema["info"].update( + { + "title": title, + "version": "v1", + "description": f"{base_description}\n\n {description_suffix}", + } + ) + + schemas_to_validate = [ + (stable_schema, "Stable schema"), + (experimental_schema, "Experimental schema"), + (deprecated_schema, "Deprecated schema"), + (combined_schema, "Combined (stainless) schema"), + ] + + for schema, _ in schemas_to_validate: + schema_transforms._fix_schema_issues(schema) + schema_transforms._apply_legacy_sorting(schema) + + print("\nValidating generated schemas...") + failed_schemas = [ + name for schema, name in schemas_to_validate if not schema_transforms.validate_openapi_schema(schema, name) + ] + if failed_schemas: + raise ValueError(f"Invalid schemas: {', '.join(failed_schemas)}") + + # Ensure output directory exists + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + # Save the stable specification + yaml_path = output_path / "llama-stack-spec.yaml" + schema_transforms._write_yaml_file(yaml_path, stable_schema) + # Post-process the YAML file to remove $defs section and fix references + with open(yaml_path) as f: + yaml_content = f.read() + + if " $defs:" in yaml_content or "#/$defs/" in yaml_content: + # Use string replacement to fix references directly + if "#/$defs/" in yaml_content: + yaml_content = yaml_content.replace("#/$defs/", "#/components/schemas/") + + # Parse the YAML content + yaml_data = yaml.safe_load(yaml_content) + + # Move $defs to components/schemas if it exists + if "$defs" in yaml_data: + if "components" not in yaml_data: + yaml_data["components"] = {} + if "schemas" not in yaml_data["components"]: + yaml_data["components"]["schemas"] = {} + + # Move all $defs to components/schemas + for def_name, def_schema in yaml_data["$defs"].items(): + yaml_data["components"]["schemas"][def_name] = def_schema + + # Remove the $defs section + del yaml_data["$defs"] + + # Write the modified YAML back + schema_transforms._write_yaml_file(yaml_path, yaml_data) + + print(f"Generated YAML (stable): {yaml_path}") + + experimental_yaml_path = output_path / "experimental-llama-stack-spec.yaml" + schema_transforms._write_yaml_file(experimental_yaml_path, experimental_schema) + print(f"Generated YAML (experimental): {experimental_yaml_path}") + + deprecated_yaml_path = output_path / "deprecated-llama-stack-spec.yaml" + schema_transforms._write_yaml_file(deprecated_yaml_path, deprecated_schema) + print(f"Generated YAML (deprecated): {deprecated_yaml_path}") + + # Generate combined (stainless) spec + stainless_yaml_path = output_path / "stainless-llama-stack-spec.yaml" + schema_transforms._write_yaml_file(stainless_yaml_path, combined_schema) + print(f"Generated YAML (stainless/combined): {stainless_yaml_path}") + + return stable_schema + + +def main(): + """Main entry point for the FastAPI OpenAPI generator.""" + import argparse + + parser = argparse.ArgumentParser(description="Generate OpenAPI specification using FastAPI") + parser.add_argument("output_dir", help="Output directory for generated files") + + args = parser.parse_args() + + print("Generating OpenAPI specification using FastAPI...") + print(f"Output directory: {args.output_dir}") + + try: + openapi_schema = generate_openapi_spec(output_dir=args.output_dir) + + print("\nOpenAPI specification generated successfully!") + print(f"Schemas: {len(openapi_schema.get('components', {}).get('schemas', {}))}") + print(f"Paths: {len(openapi_schema.get('paths', {}))}") + operation_count = sum( + 1 + for path_info in openapi_schema.get("paths", {}).values() + for method in ["get", "post", "put", "delete", "patch"] + if method in path_info + ) + print(f"Operations: {operation_count}") + + except Exception as e: + print(f"Error generating OpenAPI specification: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/openapi_generator/schema_collection.py b/scripts/openapi_generator/schema_collection.py new file mode 100644 index 000000000..127f6da9c --- /dev/null +++ b/scripts/openapi_generator/schema_collection.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Schema discovery and collection for OpenAPI generation. +""" + +from typing import Any + + +def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None: + """Ensure components.schemas exists in the schema.""" + if "components" not in openapi_schema: + openapi_schema["components"] = {} + if "schemas" not in openapi_schema["components"]: + openapi_schema["components"]["schemas"] = {} + + +def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None: + """ + Extract $defs from a schema, move them to components/schemas, and fix references. + This handles both TypeAdapter-generated schemas and model_json_schema() schemas. + """ + if "$defs" in schema: + defs = schema.pop("$defs") + for def_name, def_schema in defs.items(): + if def_name not in openapi_schema["components"]["schemas"]: + openapi_schema["components"]["schemas"][def_name] = def_schema + # Recursively handle $defs in nested schemas + _extract_and_fix_defs(def_schema, openapi_schema) + + # Fix any references in the main schema that point to $defs + def fix_refs_in_schema(obj: Any) -> None: + if isinstance(obj, dict): + if "$ref" in obj and obj["$ref"].startswith("#/$defs/"): + obj["$ref"] = obj["$ref"].replace("#/$defs/", "#/components/schemas/") + for value in obj.values(): + fix_refs_in_schema(value) + elif isinstance(obj, list): + for item in obj: + fix_refs_in_schema(item) + + fix_refs_in_schema(schema) + + +def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Ensure all registered schemas (decorated, explicit, and dynamic) are included in the OpenAPI schema. + Relies on llama_stack_api's registry instead of recursively importing every module. + """ + _ensure_components_schemas(openapi_schema) + + from pydantic import TypeAdapter + + from llama_stack_api.schema_utils import ( + iter_dynamic_schema_types, + iter_json_schema_types, + iter_registered_schema_types, + ) + + # Handle explicitly registered schemas first (union types, Annotated structs, etc.) + for registration_info in iter_registered_schema_types(): + schema_type = registration_info.type + schema_name = registration_info.name + if schema_name not in openapi_schema["components"]["schemas"]: + try: + adapter = TypeAdapter(schema_type) + schema = adapter.json_schema(ref_template="#/components/schemas/{model}") + _extract_and_fix_defs(schema, openapi_schema) + openapi_schema["components"]["schemas"][schema_name] = schema + except Exception as e: + print(f"Warning: Failed to generate schema for registered type {schema_name}: {e}") + import traceback + + traceback.print_exc() + continue + + # Add @json_schema_type decorated models + for model in iter_json_schema_types(): + schema_name = getattr(model, "_llama_stack_schema_name", None) or getattr(model, "__name__", None) + if not schema_name: + continue + if schema_name not in openapi_schema["components"]["schemas"]: + try: + if hasattr(model, "model_json_schema"): + schema = model.model_json_schema(ref_template="#/components/schemas/{model}") + else: + adapter = TypeAdapter(model) + schema = adapter.json_schema(ref_template="#/components/schemas/{model}") + _extract_and_fix_defs(schema, openapi_schema) + openapi_schema["components"]["schemas"][schema_name] = schema + except Exception as e: + print(f"Warning: Failed to generate schema for {schema_name}: {e}") + continue + + # Include any dynamic models generated while building endpoints + for model in iter_dynamic_schema_types(): + try: + schema_name = model.__name__ + if schema_name not in openapi_schema["components"]["schemas"]: + schema = model.model_json_schema(ref_template="#/components/schemas/{model}") + _extract_and_fix_defs(schema, openapi_schema) + openapi_schema["components"]["schemas"][schema_name] = schema + except Exception: + continue + + return openapi_schema diff --git a/scripts/openapi_generator/schema_filtering.py b/scripts/openapi_generator/schema_filtering.py new file mode 100644 index 000000000..4667d27a5 --- /dev/null +++ b/scripts/openapi_generator/schema_filtering.py @@ -0,0 +1,297 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Schema filtering and version filtering for OpenAPI generation. +""" + +from typing import Any + +from llama_stack_api.schema_utils import iter_json_schema_types, iter_registered_schema_types +from llama_stack_api.version import ( + LLAMA_STACK_API_V1, + LLAMA_STACK_API_V1ALPHA, + LLAMA_STACK_API_V1BETA, +) + + +def _get_all_json_schema_type_names() -> set[str]: + """Collect schema names from @json_schema_type-decorated models.""" + schema_names = set() + for model in iter_json_schema_types(): + schema_name = getattr(model, "_llama_stack_schema_name", None) or getattr(model, "__name__", None) + if schema_name: + schema_names.add(schema_name) + return schema_names + + +def _get_explicit_schema_names(openapi_schema: dict[str, Any]) -> set[str]: + """Schema names to keep even if not referenced by a path.""" + registered_schema_names = {info.name for info in iter_registered_schema_types()} + json_schema_type_names = _get_all_json_schema_type_names() + return registered_schema_names | json_schema_type_names + + +def _find_schema_refs_in_object(obj: Any) -> set[str]: + """ + Recursively find all schema references ($ref) in an object. + """ + refs = set() + + if isinstance(obj, dict): + for key, value in obj.items(): + if key == "$ref" and isinstance(value, str) and value.startswith("#/components/schemas/"): + schema_name = value.split("/")[-1] + refs.add(schema_name) + else: + refs.update(_find_schema_refs_in_object(value)) + elif isinstance(obj, list): + for item in obj: + refs.update(_find_schema_refs_in_object(item)) + + return refs + + +def _add_transitive_references( + referenced_schemas: set[str], all_schemas: dict[str, Any], initial_schemas: set[str] | None = None +) -> set[str]: + """Add transitive references for given schemas.""" + if initial_schemas: + referenced_schemas.update(initial_schemas) + additional_schemas = set() + for schema_name in initial_schemas: + if schema_name in all_schemas: + additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name])) + else: + additional_schemas = set() + for schema_name in referenced_schemas: + if schema_name in all_schemas: + additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name])) + + while additional_schemas: + new_schemas = additional_schemas - referenced_schemas + if not new_schemas: + break + referenced_schemas.update(new_schemas) + additional_schemas = set() + for schema_name in new_schemas: + if schema_name in all_schemas: + additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name])) + + return referenced_schemas + + +def _find_schemas_referenced_by_paths(filtered_paths: dict[str, Any], openapi_schema: dict[str, Any]) -> set[str]: + """ + Find all schemas that are referenced by the filtered paths. + This recursively traverses the path definitions to find all $ref references. + """ + referenced_schemas = set() + + # Traverse all filtered paths + for _, path_item in filtered_paths.items(): + if not isinstance(path_item, dict): + continue + + # Check each HTTP method in the path + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method in path_item: + operation = path_item[method] + if isinstance(operation, dict): + # Find all schema references in this operation + referenced_schemas.update(_find_schema_refs_in_object(operation)) + + # Also check the responses section for schema references + if "components" in openapi_schema and "responses" in openapi_schema["components"]: + referenced_schemas.update(_find_schema_refs_in_object(openapi_schema["components"]["responses"])) + + # Also include schemas that are referenced by other schemas (transitive references) + # This ensures we include all dependencies + all_schemas = openapi_schema.get("components", {}).get("schemas", {}) + additional_schemas = set() + + for schema_name in referenced_schemas: + if schema_name in all_schemas: + additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name])) + + # Keep adding transitive references until no new ones are found + while additional_schemas: + new_schemas = additional_schemas - referenced_schemas + if not new_schemas: + break + referenced_schemas.update(new_schemas) + additional_schemas = set() + for schema_name in new_schemas: + if schema_name in all_schemas: + additional_schemas.update(_find_schema_refs_in_object(all_schemas[schema_name])) + + return referenced_schemas + + +def _filter_schemas_by_references( + filtered_schema: dict[str, Any], filtered_paths: dict[str, Any], openapi_schema: dict[str, Any] +) -> dict[str, Any]: + """Filter schemas to only include ones referenced by filtered paths and explicit schemas.""" + if "components" not in filtered_schema or "schemas" not in filtered_schema["components"]: + return filtered_schema + + referenced_schemas = _find_schemas_referenced_by_paths(filtered_paths, openapi_schema) + all_schemas = openapi_schema.get("components", {}).get("schemas", {}) + explicit_names = _get_explicit_schema_names(openapi_schema) + referenced_schemas = _add_transitive_references(referenced_schemas, all_schemas, explicit_names) + + filtered_schemas = { + name: schema for name, schema in filtered_schema["components"]["schemas"].items() if name in referenced_schemas + } + filtered_schema["components"]["schemas"] = filtered_schemas + + if "components" in openapi_schema and "$defs" in openapi_schema["components"]: + if "components" not in filtered_schema: + filtered_schema["components"] = {} + filtered_schema["components"]["$defs"] = openapi_schema["components"]["$defs"] + + return filtered_schema + + +def _path_starts_with_version(path: str, version: str) -> bool: + """Check if a path starts with a specific API version prefix.""" + return path.startswith(f"/{version}/") + + +def _is_stable_path(path: str) -> bool: + """Check if a path is a stable v1 path (not v1alpha or v1beta).""" + return ( + _path_starts_with_version(path, LLAMA_STACK_API_V1) + and not _path_starts_with_version(path, LLAMA_STACK_API_V1ALPHA) + and not _path_starts_with_version(path, LLAMA_STACK_API_V1BETA) + ) + + +def _is_experimental_path(path: str) -> bool: + """Check if a path is an experimental path (v1alpha or v1beta).""" + return _path_starts_with_version(path, LLAMA_STACK_API_V1ALPHA) or _path_starts_with_version( + path, LLAMA_STACK_API_V1BETA + ) + + +def _is_path_deprecated(path_item: dict[str, Any]) -> bool: + """Check if a path item has any deprecated operations.""" + if not isinstance(path_item, dict): + return False + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if isinstance(path_item.get(method), dict) and path_item[method].get("deprecated", False): + return True + return False + + +def _filter_schema_by_version( + openapi_schema: dict[str, Any], stable_only: bool = True, exclude_deprecated: bool = True +) -> dict[str, Any]: + """ + Filter OpenAPI schema by API version. + + Args: + openapi_schema: The full OpenAPI schema + stable_only: If True, return only /v1/ paths (stable). If False, return only /v1alpha/ and /v1beta/ paths (experimental). + exclude_deprecated: If True, exclude deprecated endpoints from the result. + + Returns: + Filtered OpenAPI schema + """ + filtered_schema = openapi_schema.copy() + + if "paths" not in filtered_schema: + return filtered_schema + + filtered_paths = {} + for path, path_item in filtered_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + + # Filter at operation level, not path level + # This allows paths with both deprecated and non-deprecated operations + filtered_path_item = {} + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method not in path_item: + continue + operation = path_item[method] + if not isinstance(operation, dict): + continue + + # Skip deprecated operations if exclude_deprecated is True + if exclude_deprecated and operation.get("deprecated", False): + continue + + filtered_path_item[method] = operation + + # Only include path if it has at least one operation after filtering + if filtered_path_item: + # Check if path matches version filter + if (stable_only and _is_stable_path(path)) or (not stable_only and _is_experimental_path(path)): + filtered_paths[path] = filtered_path_item + + filtered_schema["paths"] = filtered_paths + return _filter_schemas_by_references(filtered_schema, filtered_paths, openapi_schema) + + +def _filter_deprecated_schema(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Filter OpenAPI schema to include only deprecated endpoints. + Includes all deprecated endpoints regardless of version (v1, v1alpha, v1beta). + """ + filtered_schema = openapi_schema.copy() + + if "paths" not in filtered_schema: + return filtered_schema + + # Filter paths to only include deprecated ones + filtered_paths = {} + for path, path_item in filtered_schema["paths"].items(): + if _is_path_deprecated(path_item): + filtered_paths[path] = path_item + + filtered_schema["paths"] = filtered_paths + + return filtered_schema + + +def _filter_combined_schema(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Filter OpenAPI schema to include both stable (v1) and experimental (v1alpha, v1beta) APIs. + Includes deprecated endpoints. This is used for the combined "stainless" spec. + """ + filtered_schema = openapi_schema.copy() + + if "paths" not in filtered_schema: + return filtered_schema + + # Filter paths to include stable (v1) and experimental (v1alpha, v1beta), excluding deprecated + filtered_paths = {} + for path, path_item in filtered_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + + # Filter at operation level, not path level + # This allows paths with both deprecated and non-deprecated operations + filtered_path_item = {} + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method not in path_item: + continue + operation = path_item[method] + if not isinstance(operation, dict): + continue + + filtered_path_item[method] = operation + + # Only include path if it has at least one operation after filtering + if filtered_path_item: + # Check if path matches version filter (stable or experimental) + if _is_stable_path(path) or _is_experimental_path(path): + filtered_paths[path] = filtered_path_item + + filtered_schema["paths"] = filtered_paths + + return _filter_schemas_by_references(filtered_schema, filtered_paths, openapi_schema) diff --git a/scripts/openapi_generator/schema_transforms.py b/scripts/openapi_generator/schema_transforms.py new file mode 100644 index 000000000..5821c99d5 --- /dev/null +++ b/scripts/openapi_generator/schema_transforms.py @@ -0,0 +1,963 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Schema transformations and fixes for OpenAPI generation. +""" + +import copy +from collections import OrderedDict +from pathlib import Path +from typing import Any + +import yaml +from openapi_spec_validator import validate_spec +from openapi_spec_validator.exceptions import OpenAPISpecValidatorError + +from . import endpoints, schema_collection +from ._legacy_order import ( + LEGACY_OPERATION_KEYS, + LEGACY_PATH_ORDER, + LEGACY_RESPONSE_ORDER, + LEGACY_SCHEMA_ORDER, + LEGACY_SECURITY, + LEGACY_TAG_GROUPS, + LEGACY_TAGS, +) +from .state import _extra_body_fields + + +def _fix_ref_references(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Fix $ref references to point to components/schemas instead of $defs. + This prevents the YAML dumper from creating a root-level $defs section. + """ + + def fix_refs(obj: Any) -> None: + if isinstance(obj, dict): + if "$ref" in obj and obj["$ref"].startswith("#/$defs/"): + # Replace #/$defs/ with #/components/schemas/ + obj["$ref"] = obj["$ref"].replace("#/$defs/", "#/components/schemas/") + for value in obj.values(): + fix_refs(value) + elif isinstance(obj, list): + for item in obj: + fix_refs(item) + + fix_refs(openapi_schema) + return openapi_schema + + +def _normalize_empty_responses(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """Convert empty 200 responses into 204 No Content.""" + + for path_item in openapi_schema.get("paths", {}).values(): + if not isinstance(path_item, dict): + continue + for method in list(path_item.keys()): + operation = path_item.get(method) + if not isinstance(operation, dict): + continue + responses = operation.get("responses") + if not isinstance(responses, dict): + continue + response_200 = responses.get("200") or responses.get(200) + if response_200 is None: + continue + content = response_200.get("content") + if content and any( + isinstance(media, dict) and media.get("schema") not in ({}, None) for media in content.values() + ): + continue + responses.pop("200", None) + responses.pop(200, None) + responses["204"] = {"description": response_200.get("description", "No Content")} + return openapi_schema + + +def _eliminate_defs_section(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Eliminate $defs section entirely by moving all definitions to components/schemas. + This matches the structure of the old pyopenapi generator for oasdiff compatibility. + """ + schema_collection._ensure_components_schemas(openapi_schema) + + # First pass: collect all $defs from anywhere in the schema + defs_to_move = {} + + def collect_defs(obj: Any) -> None: + if isinstance(obj, dict): + if "$defs" in obj: + # Collect $defs for later processing + for def_name, def_schema in obj["$defs"].items(): + if def_name not in defs_to_move: + defs_to_move[def_name] = def_schema + + # Recursively process all values + for value in obj.values(): + collect_defs(value) + elif isinstance(obj, list): + for item in obj: + collect_defs(item) + + # Collect all $defs + collect_defs(openapi_schema) + + # Move all $defs to components/schemas + for def_name, def_schema in defs_to_move.items(): + if def_name not in openapi_schema["components"]["schemas"]: + openapi_schema["components"]["schemas"][def_name] = def_schema + + # Also move any existing root-level $defs to components/schemas + if "$defs" in openapi_schema: + print(f"Found root-level $defs with {len(openapi_schema['$defs'])} items, moving to components/schemas") + for def_name, def_schema in openapi_schema["$defs"].items(): + if def_name not in openapi_schema["components"]["schemas"]: + openapi_schema["components"]["schemas"][def_name] = def_schema + # Remove the root-level $defs + del openapi_schema["$defs"] + + # Second pass: remove all $defs sections from anywhere in the schema + def remove_defs(obj: Any) -> None: + if isinstance(obj, dict): + if "$defs" in obj: + del obj["$defs"] + + # Recursively process all values + for value in obj.values(): + remove_defs(value) + elif isinstance(obj, list): + for item in obj: + remove_defs(item) + + # Remove all $defs sections + remove_defs(openapi_schema) + + return openapi_schema + + +def _add_error_responses(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Add standard error response definitions to the OpenAPI schema. + Uses the actual Error model from the codebase for consistency. + """ + if "components" not in openapi_schema: + openapi_schema["components"] = {} + if "responses" not in openapi_schema["components"]: + openapi_schema["components"]["responses"] = {} + + try: + from llama_stack_api.datatypes import Error + + schema_collection._ensure_components_schemas(openapi_schema) + if "Error" not in openapi_schema["components"]["schemas"]: + openapi_schema["components"]["schemas"]["Error"] = Error.model_json_schema() + except ImportError: + pass + + schema_collection._ensure_components_schemas(openapi_schema) + if "Response" not in openapi_schema["components"]["schemas"]: + openapi_schema["components"]["schemas"]["Response"] = {"title": "Response", "type": "object"} + + # Define standard HTTP error responses + error_responses = { + 400: { + "name": "BadRequest400", + "description": "The request was invalid or malformed", + "example": {"status": 400, "title": "Bad Request", "detail": "The request was invalid or malformed"}, + }, + 429: { + "name": "TooManyRequests429", + "description": "The client has sent too many requests in a given amount of time", + "example": { + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later.", + }, + }, + 500: { + "name": "InternalServerError500", + "description": "The server encountered an unexpected error", + "example": {"status": 500, "title": "Internal Server Error", "detail": "An unexpected error occurred"}, + }, + } + + # Add each error response to the schema + for _, error_info in error_responses.items(): + response_name = error_info["name"] + openapi_schema["components"]["responses"][response_name] = { + "description": error_info["description"], + "content": { + "application/json": {"schema": {"$ref": "#/components/schemas/Error"}, "example": error_info["example"]} + }, + } + + # Add a default error response + openapi_schema["components"]["responses"]["DefaultError"] = { + "description": "An error occurred", + "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Error"}}}, + } + + return openapi_schema + + +def _fix_path_parameters(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Fix path parameter resolution issues by adding explicit parameter definitions. + """ + if "paths" not in openapi_schema: + return openapi_schema + + for path, path_item in openapi_schema["paths"].items(): + # Extract path parameters from the URL + path_params = endpoints._extract_path_parameters(path) + + if not path_params: + continue + + # Add parameters to each operation in this path + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method in path_item and isinstance(path_item[method], dict): + operation = path_item[method] + if "parameters" not in operation: + operation["parameters"] = [] + + # Add path parameters that aren't already defined + existing_param_names = {p.get("name") for p in operation["parameters"] if p.get("in") == "path"} + for param in path_params: + if param["name"] not in existing_param_names: + operation["parameters"].append(param) + + return openapi_schema + + +def _get_schema_title(item: dict[str, Any]) -> str | None: + """Extract a title for a schema item to use in union variant names.""" + if "$ref" in item: + return item["$ref"].split("/")[-1] + elif "type" in item: + type_val = item["type"] + if type_val == "null": + return None + if type_val == "array" and "items" in item: + items = item["items"] + if isinstance(items, dict): + if "anyOf" in items or "oneOf" in items: + nested_union = items.get("anyOf") or items.get("oneOf") + if isinstance(nested_union, list) and len(nested_union) > 0: + nested_types = [] + for nested_item in nested_union: + if isinstance(nested_item, dict): + if "$ref" in nested_item: + nested_types.append(nested_item["$ref"].split("/")[-1]) + elif "oneOf" in nested_item: + one_of_items = nested_item.get("oneOf", []) + if one_of_items and isinstance(one_of_items[0], dict) and "$ref" in one_of_items[0]: + base_name = one_of_items[0]["$ref"].split("/")[-1].split("-")[0] + nested_types.append(f"{base_name}Union") + else: + nested_types.append("Union") + elif "type" in nested_item and nested_item["type"] != "null": + nested_types.append(nested_item["type"]) + if nested_types: + unique_nested = list(dict.fromkeys(nested_types)) + # Use more descriptive names for better code generation + if len(unique_nested) <= 3: + return f"list[{' | '.join(unique_nested)}]" + else: + # Include first few types for better naming + return f"list[{unique_nested[0]} | {unique_nested[1]} | ...]" + return "list[Union]" + elif "$ref" in items: + return f"list[{items['$ref'].split('/')[-1]}]" + elif "type" in items: + return f"list[{items['type']}]" + return "array" + return type_val + elif "title" in item: + return item["title"] + return None + + +def _add_titles_to_unions(obj: Any, parent_key: str | None = None) -> None: + """Recursively add titles to union schemas (anyOf/oneOf) to help code generators infer names.""" + if isinstance(obj, dict): + # Check if this is a union schema (anyOf or oneOf) + if "anyOf" in obj or "oneOf" in obj: + union_type = "anyOf" if "anyOf" in obj else "oneOf" + union_items = obj[union_type] + + if isinstance(union_items, list) and len(union_items) > 0: + # Skip simple nullable unions (type | null) - these don't need titles + is_simple_nullable = ( + len(union_items) == 2 + and any(isinstance(item, dict) and item.get("type") == "null" for item in union_items) + and any( + isinstance(item, dict) and "type" in item and item.get("type") != "null" for item in union_items + ) + and not any( + isinstance(item, dict) and ("$ref" in item or "anyOf" in item or "oneOf" in item) + for item in union_items + ) + ) + + if is_simple_nullable: + # Remove title from simple nullable unions if it exists + if "title" in obj: + del obj["title"] + else: + # Add titles to individual union variants that need them + for item in union_items: + if isinstance(item, dict): + # Skip null types + if item.get("type") == "null": + continue + # Add title to complex variants (arrays with unions, nested unions, etc.) + # Also add to simple types if they're part of a complex union + needs_title = ( + "items" in item + or "anyOf" in item + or "oneOf" in item + or ("$ref" in item and "title" not in item) + ) + if needs_title and "title" not in item: + variant_title = _get_schema_title(item) + if variant_title: + item["title"] = variant_title + + # Try to infer a meaningful title from the union items for the parent + titles = [] + for item in union_items: + if isinstance(item, dict): + title = _get_schema_title(item) + if title: + titles.append(title) + + if titles: + # Create a title from the union items + unique_titles = list(dict.fromkeys(titles)) # Preserve order, remove duplicates + if len(unique_titles) <= 3: + title = " | ".join(unique_titles) + else: + title = f"{unique_titles[0]} | ... ({len(unique_titles)} variants)" + # Always set the title for unions to help code generators + # This will replace generic property titles with union-specific ones + obj["title"] = title + elif "title" not in obj and parent_key: + # Use parent key as fallback only if no title exists + obj["title"] = f"{parent_key.title()}Union" + + # Recursively process all values + for key, value in obj.items(): + _add_titles_to_unions(value, key) + elif isinstance(obj, list): + for item in obj: + _add_titles_to_unions(item, parent_key) + + +def _convert_anyof_const_to_enum(obj: Any) -> None: + """Convert anyOf with multiple const string values to a proper enum.""" + if isinstance(obj, dict): + if "anyOf" in obj: + any_of = obj["anyOf"] + if isinstance(any_of, list): + # Check if all items are const string values + const_values = [] + has_null = False + can_convert = True + for item in any_of: + if isinstance(item, dict): + if item.get("type") == "null": + has_null = True + elif item.get("type") == "string" and "const" in item: + const_values.append(item["const"]) + else: + # Not a simple const pattern, skip conversion for this anyOf + can_convert = False + break + + # If we have const values and they're all strings, convert to enum + if can_convert and const_values and len(const_values) == len(any_of) - (1 if has_null else 0): + # Convert to enum + obj["type"] = "string" + obj["enum"] = const_values + # Preserve default if present, otherwise try to get from first const item + if "default" not in obj: + for item in any_of: + if isinstance(item, dict) and "const" in item: + obj["default"] = item["const"] + break + # Remove anyOf + del obj["anyOf"] + # Handle nullable + if has_null: + obj["nullable"] = True + # Remove title if it's just "string" + if obj.get("title") == "string": + del obj["title"] + + # Recursively process all values + for value in obj.values(): + _convert_anyof_const_to_enum(value) + elif isinstance(obj, list): + for item in obj: + _convert_anyof_const_to_enum(item) + + +def _fix_schema_recursive(obj: Any) -> None: + """Recursively fix schema issues: exclusiveMinimum and null defaults.""" + if isinstance(obj, dict): + if "exclusiveMinimum" in obj and isinstance(obj["exclusiveMinimum"], int | float): + obj["minimum"] = obj.pop("exclusiveMinimum") + if "default" in obj and obj["default"] is None: + del obj["default"] + obj["nullable"] = True + for value in obj.values(): + _fix_schema_recursive(value) + elif isinstance(obj, list): + for item in obj: + _fix_schema_recursive(item) + + +def _clean_description(description: str) -> str: + """Remove :param, :type, :returns, and other docstring metadata from description.""" + if not description: + return description + + lines = description.split("\n") + cleaned_lines = [] + skip_until_empty = False + + for line in lines: + stripped = line.strip() + # Skip lines that start with docstring metadata markers + if stripped.startswith( + (":param", ":type", ":return", ":returns", ":raises", ":exception", ":yield", ":yields", ":cvar") + ): + skip_until_empty = True + continue + # If we're skipping and hit an empty line, resume normal processing + if skip_until_empty: + if not stripped: + skip_until_empty = False + continue + # Include the line if we're not skipping + cleaned_lines.append(line) + + # Join and strip trailing whitespace + result = "\n".join(cleaned_lines).strip() + return result + + +def _clean_schema_descriptions(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """Clean descriptions in schema definitions by removing docstring metadata.""" + if "components" not in openapi_schema or "schemas" not in openapi_schema["components"]: + return openapi_schema + + schemas = openapi_schema["components"]["schemas"] + for schema_def in schemas.values(): + if isinstance(schema_def, dict) and "description" in schema_def and isinstance(schema_def["description"], str): + schema_def["description"] = _clean_description(schema_def["description"]) + + return openapi_schema + + +def _add_extra_body_params_extension(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Add x-llama-stack-extra-body-params extension to requestBody for endpoints with ExtraBodyField parameters. + """ + if "paths" not in openapi_schema: + return openapi_schema + + from pydantic import TypeAdapter + + for path, path_item in openapi_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method not in path_item: + continue + + operation = path_item[method] + if not isinstance(operation, dict): + continue + + # Check if we have extra body fields for this path/method + key = (path, method.upper()) + if key not in _extra_body_fields: + continue + + extra_body_params = _extra_body_fields[key] + + # Ensure requestBody exists + if "requestBody" not in operation: + continue + + request_body = operation["requestBody"] + if not isinstance(request_body, dict): + continue + + # Get the schema from requestBody + content = request_body.get("content", {}) + json_content = content.get("application/json", {}) + schema_ref = json_content.get("schema", {}) + + # Remove extra body fields from the schema if they exist as properties + # Handle both $ref schemas and inline schemas + if isinstance(schema_ref, dict): + if "$ref" in schema_ref: + # Schema is a reference - remove from the referenced schema + ref_path = schema_ref["$ref"] + if ref_path.startswith("#/components/schemas/"): + schema_name = ref_path.split("/")[-1] + if "components" in openapi_schema and "schemas" in openapi_schema["components"]: + schema_def = openapi_schema["components"]["schemas"].get(schema_name) + if isinstance(schema_def, dict) and "properties" in schema_def: + for param_name, _, _ in extra_body_params: + if param_name in schema_def["properties"]: + del schema_def["properties"][param_name] + # Also remove from required if present + if "required" in schema_def and param_name in schema_def["required"]: + schema_def["required"].remove(param_name) + elif "properties" in schema_ref: + # Schema is inline - remove directly from it + for param_name, _, _ in extra_body_params: + if param_name in schema_ref["properties"]: + del schema_ref["properties"][param_name] + # Also remove from required if present + if "required" in schema_ref and param_name in schema_ref["required"]: + schema_ref["required"].remove(param_name) + + # Build the extra body params schema + extra_params_schema = {} + for param_name, param_type, description in extra_body_params: + try: + # Generate JSON schema for the parameter type + adapter = TypeAdapter(param_type) + param_schema = adapter.json_schema(ref_template="#/components/schemas/{model}") + + # Add description if provided + if description: + param_schema["description"] = description + + extra_params_schema[param_name] = param_schema + except Exception: + # If we can't generate schema, skip this parameter + continue + + if extra_params_schema: + # Add the extension to requestBody + if "x-llama-stack-extra-body-params" not in request_body: + request_body["x-llama-stack-extra-body-params"] = extra_params_schema + + return openapi_schema + + +def _remove_query_params_from_body_endpoints(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Remove query parameters from POST/PUT/PATCH endpoints that have a request body. + FastAPI sometimes infers parameters as query params even when they should be in the request body. + """ + if "paths" not in openapi_schema: + return openapi_schema + + body_methods = {"post", "put", "patch"} + + for _path, path_item in openapi_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + + for method in body_methods: + if method not in path_item: + continue + + operation = path_item[method] + if not isinstance(operation, dict): + continue + + # Check if this operation has a request body + has_request_body = "requestBody" in operation and operation["requestBody"] + + if has_request_body: + # Remove all query parameters (parameters with "in": "query") + if "parameters" in operation: + # Filter out query parameters, keep path and header parameters + operation["parameters"] = [ + param + for param in operation["parameters"] + if isinstance(param, dict) and param.get("in") != "query" + ] + # Remove the parameters key if it's now empty + if not operation["parameters"]: + del operation["parameters"] + + return openapi_schema + + +def _remove_request_bodies_from_get_endpoints(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Remove request bodies from GET endpoints and convert their parameters to query parameters. + + GET requests should never have request bodies - all parameters should be query parameters. + This function removes any requestBody that FastAPI may have incorrectly added to GET endpoints + and converts any parameters in the requestBody to query parameters. + """ + if "paths" not in openapi_schema: + return openapi_schema + + for _path, path_item in openapi_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + + # Check GET method specifically + if "get" in path_item: + operation = path_item["get"] + if not isinstance(operation, dict): + continue + + if "requestBody" in operation: + request_body = operation["requestBody"] + # Extract parameters from requestBody and convert to query parameters + if isinstance(request_body, dict) and "content" in request_body: + content = request_body.get("content", {}) + json_content = content.get("application/json", {}) + schema = json_content.get("schema", {}) + + if "parameters" not in operation: + operation["parameters"] = [] + elif not isinstance(operation["parameters"], list): + operation["parameters"] = [] + + # If the schema has properties, convert each to a query parameter + if isinstance(schema, dict) and "properties" in schema: + for param_name, param_schema in schema["properties"].items(): + # Check if this parameter is already in the parameters list + existing_param = None + for existing in operation["parameters"]: + if isinstance(existing, dict) and existing.get("name") == param_name: + existing_param = existing + break + + if not existing_param: + # Create a new query parameter from the requestBody property + required = param_name in schema.get("required", []) + query_param = { + "name": param_name, + "in": "query", + "required": required, + "schema": param_schema, + } + # Add description if present + if "description" in param_schema: + query_param["description"] = param_schema["description"] + operation["parameters"].append(query_param) + elif isinstance(schema, dict): + # Handle direct schema (not a model with properties) + # Try to infer parameter name from schema title + param_name = schema.get("title", "").lower().replace(" ", "_") + if param_name: + # Check if this parameter is already in the parameters list + existing_param = None + for existing in operation["parameters"]: + if isinstance(existing, dict) and existing.get("name") == param_name: + existing_param = existing + break + + if not existing_param: + # Create a new query parameter from the requestBody schema + query_param = { + "name": param_name, + "in": "query", + "required": False, # Default to optional for GET requests + "schema": schema, + } + # Add description if present + if "description" in schema: + query_param["description"] = schema["description"] + operation["parameters"].append(query_param) + + # Remove request body from GET endpoint + del operation["requestBody"] + + return openapi_schema + + +def _extract_duplicate_union_types(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Extract duplicate union types to shared schema references. + + Stainless generates type names from union types based on their context, which can cause + duplicate names when the same union appears in different places. This function extracts + these duplicate unions to shared schema definitions and replaces inline definitions with + references to them. + + According to Stainless docs, when duplicate types are detected, they should be extracted + to the same ref and declared as a model. This ensures Stainless generates consistent + type names regardless of where the union is referenced. + + Fixes: https://www.stainless.com/docs/reference/diagnostics#Python/DuplicateDeclaration + """ + if "components" not in openapi_schema or "schemas" not in openapi_schema["components"]: + return openapi_schema + + schemas = openapi_schema["components"]["schemas"] + + # Extract the Output union type (used in OpenAIResponseObjectWithInput-Output and ListOpenAIResponseInputItem) + output_union_schema_name = "OpenAIResponseMessageOutputUnion" + output_union_title = None + + # Get the union type from OpenAIResponseObjectWithInput-Output.input.items.anyOf + if "OpenAIResponseObjectWithInput-Output" in schemas: + schema = schemas["OpenAIResponseObjectWithInput-Output"] + if isinstance(schema, dict) and "properties" in schema: + input_prop = schema["properties"].get("input") + if isinstance(input_prop, dict) and "items" in input_prop: + items = input_prop["items"] + if isinstance(items, dict) and "anyOf" in items: + # Extract the union schema with deep copy + output_union_schema = copy.deepcopy(items["anyOf"]) + output_union_title = items.get("title", "OpenAIResponseMessageOutputUnion") + + # Collect all refs from the oneOf to detect duplicates + refs_in_oneof = set() + for item in output_union_schema: + if isinstance(item, dict) and "oneOf" in item: + oneof = item["oneOf"] + if isinstance(oneof, list): + for variant in oneof: + if isinstance(variant, dict) and "$ref" in variant: + refs_in_oneof.add(variant["$ref"]) + item["x-stainless-naming"] = "OpenAIResponseMessageOutputOneOf" + + # Remove duplicate refs from anyOf that are already in oneOf + deduplicated_schema = [] + for item in output_union_schema: + if isinstance(item, dict) and "$ref" in item: + if item["$ref"] not in refs_in_oneof: + deduplicated_schema.append(item) + else: + deduplicated_schema.append(item) + output_union_schema = deduplicated_schema + + # Create the shared schema with x-stainless-naming to ensure consistent naming + if output_union_schema_name not in schemas: + schemas[output_union_schema_name] = { + "anyOf": output_union_schema, + "title": output_union_title, + "x-stainless-naming": output_union_schema_name, + } + # Replace with reference + input_prop["items"] = {"$ref": f"#/components/schemas/{output_union_schema_name}"} + + # Replace the same union in ListOpenAIResponseInputItem.data.items.anyOf + if "ListOpenAIResponseInputItem" in schemas and output_union_schema_name in schemas: + schema = schemas["ListOpenAIResponseInputItem"] + if isinstance(schema, dict) and "properties" in schema: + data_prop = schema["properties"].get("data") + if isinstance(data_prop, dict) and "items" in data_prop: + items = data_prop["items"] + if isinstance(items, dict) and "anyOf" in items: + # Replace with reference + data_prop["items"] = {"$ref": f"#/components/schemas/{output_union_schema_name}"} + + # Extract the Input union type (used in _responses_Request.input.anyOf[1].items.anyOf) + input_union_schema_name = "OpenAIResponseMessageInputUnion" + + if "_responses_Request" in schemas: + schema = schemas["_responses_Request"] + if isinstance(schema, dict) and "properties" in schema: + input_prop = schema["properties"].get("input") + if isinstance(input_prop, dict) and "anyOf" in input_prop: + any_of = input_prop["anyOf"] + if isinstance(any_of, list) and len(any_of) > 1: + # Check the second item (index 1) which should be the array type + second_item = any_of[1] + if isinstance(second_item, dict) and "items" in second_item: + items = second_item["items"] + if isinstance(items, dict) and "anyOf" in items: + # Extract the union schema with deep copy + input_union_schema = copy.deepcopy(items["anyOf"]) + input_union_title = items.get("title", "OpenAIResponseMessageInputUnion") + + # Collect all refs from the oneOf to detect duplicates + refs_in_oneof = set() + for item in input_union_schema: + if isinstance(item, dict) and "oneOf" in item: + oneof = item["oneOf"] + if isinstance(oneof, list): + for variant in oneof: + if isinstance(variant, dict) and "$ref" in variant: + refs_in_oneof.add(variant["$ref"]) + item["x-stainless-naming"] = "OpenAIResponseMessageInputOneOf" + + # Remove duplicate refs from anyOf that are already in oneOf + deduplicated_schema = [] + for item in input_union_schema: + if isinstance(item, dict) and "$ref" in item: + if item["$ref"] not in refs_in_oneof: + deduplicated_schema.append(item) + else: + deduplicated_schema.append(item) + input_union_schema = deduplicated_schema + + # Create the shared schema with x-stainless-naming to ensure consistent naming + if input_union_schema_name not in schemas: + schemas[input_union_schema_name] = { + "anyOf": input_union_schema, + "title": input_union_title, + "x-stainless-naming": input_union_schema_name, + } + # Replace with reference + second_item["items"] = {"$ref": f"#/components/schemas/{input_union_schema_name}"} + + return openapi_schema + + +def _convert_multiline_strings_to_literal(obj: Any) -> Any: + """Recursively convert multi-line strings to LiteralScalarString for YAML block scalar formatting.""" + try: + from ruamel.yaml.scalarstring import LiteralScalarString + + if isinstance(obj, str) and "\n" in obj: + return LiteralScalarString(obj) + elif isinstance(obj, dict): + return {key: _convert_multiline_strings_to_literal(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [_convert_multiline_strings_to_literal(item) for item in obj] + else: + return obj + except ImportError: + return obj + + +def _write_yaml_file(file_path: Path, schema: dict[str, Any]) -> None: + """Write schema to YAML file using ruamel.yaml if available, otherwise standard yaml.""" + try: + from ruamel.yaml import YAML + + yaml_writer = YAML() + yaml_writer.default_flow_style = False + yaml_writer.sort_keys = False + yaml_writer.width = 4096 + yaml_writer.allow_unicode = True + schema = _convert_multiline_strings_to_literal(schema) + with open(file_path, "w") as f: + yaml_writer.dump(schema, f) + except ImportError: + with open(file_path, "w") as f: + yaml.dump(schema, f, default_flow_style=False, sort_keys=False) + + # Post-process to remove trailing whitespace from all lines + with open(file_path) as f: + lines = f.readlines() + + # Strip trailing whitespace from each line, preserving newlines + cleaned_lines = [line.rstrip() + "\n" if line.endswith("\n") else line.rstrip() for line in lines] + + with open(file_path, "w") as f: + f.writelines(cleaned_lines) + + +def _apply_legacy_sorting(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """ + Temporarily match the legacy ordering from origin/main so diffs are easier to read. + Remove this once the generator output stabilizes and we no longer need legacy diffs. + """ + + def order_mapping(data: dict[str, Any], priority: list[str]) -> OrderedDict[str, Any]: + ordered: OrderedDict[str, Any] = OrderedDict() + for key in priority: + if key in data: + ordered[key] = data[key] + for key, value in data.items(): + if key not in ordered: + ordered[key] = value + return ordered + + paths = openapi_schema.get("paths") + if isinstance(paths, dict): + openapi_schema["paths"] = order_mapping(paths, LEGACY_PATH_ORDER) + for path, path_item in openapi_schema["paths"].items(): + if not isinstance(path_item, dict): + continue + ordered_path_item = OrderedDict() + for method in ["get", "post", "put", "delete", "patch", "head", "options"]: + if method in path_item: + ordered_path_item[method] = order_mapping(path_item[method], LEGACY_OPERATION_KEYS) + for key, value in path_item.items(): + if key not in ordered_path_item: + if isinstance(value, dict) and key.lower() in { + "get", + "post", + "put", + "delete", + "patch", + "head", + "options", + }: + ordered_path_item[key] = order_mapping(value, LEGACY_OPERATION_KEYS) + else: + ordered_path_item[key] = value + openapi_schema["paths"][path] = ordered_path_item + + components = openapi_schema.setdefault("components", {}) + schemas = components.get("schemas") + if isinstance(schemas, dict): + components["schemas"] = order_mapping(schemas, LEGACY_SCHEMA_ORDER) + responses = components.get("responses") + if isinstance(responses, dict): + components["responses"] = order_mapping(responses, LEGACY_RESPONSE_ORDER) + + if LEGACY_TAGS: + openapi_schema["tags"] = LEGACY_TAGS + + if LEGACY_TAG_GROUPS: + openapi_schema["x-tagGroups"] = LEGACY_TAG_GROUPS + + if LEGACY_SECURITY: + openapi_schema["security"] = LEGACY_SECURITY + + return openapi_schema + + +def _fix_schema_issues(openapi_schema: dict[str, Any]) -> dict[str, Any]: + """Fix common schema issues: exclusiveMinimum, null defaults, and add titles to unions.""" + # Convert anyOf with const values to enums across the entire schema + _convert_anyof_const_to_enum(openapi_schema) + + # Fix other schema issues and add titles to unions + if "components" in openapi_schema and "schemas" in openapi_schema["components"]: + for schema_name, schema_def in openapi_schema["components"]["schemas"].items(): + _fix_schema_recursive(schema_def) + _add_titles_to_unions(schema_def, schema_name) + return openapi_schema + + +def validate_openapi_schema(schema: dict[str, Any], schema_name: str = "OpenAPI schema") -> bool: + """ + Validate an OpenAPI schema using openapi-spec-validator. + + Args: + schema: The OpenAPI schema dictionary to validate + schema_name: Name of the schema for error reporting + + Returns: + True if valid, False otherwise + + Raises: + OpenAPIValidationError: If validation fails + """ + try: + validate_spec(schema) + print(f"{schema_name} is valid") + return True + except OpenAPISpecValidatorError as e: + print(f"{schema_name} validation failed: {e}") + return False + except Exception as e: + print(f"{schema_name} validation error: {e}") + return False diff --git a/scripts/openapi_generator/stainless_config/__init__.py b/scripts/openapi_generator/stainless_config/__init__.py new file mode 100644 index 000000000..bf44f82ba --- /dev/null +++ b/scripts/openapi_generator/stainless_config/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Package marker for Stainless config generation. diff --git a/scripts/openapi_generator/stainless_config/generate_config.py b/scripts/openapi_generator/stainless_config/generate_config.py new file mode 100644 index 000000000..dabc2119f --- /dev/null +++ b/scripts/openapi_generator/stainless_config/generate_config.py @@ -0,0 +1,821 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from __future__ import annotations + +from collections.abc import Iterator +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +HEADER = "# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json\n\n" + +SECTION_ORDER = [ + "organization", + "security", + "security_schemes", + "targets", + "client_settings", + "environments", + "pagination", + "settings", + "openapi", + "readme", + "resources", +] + +ORGANIZATION = { + "name": "llama-stack-client", + "docs": "https://llama-stack.readthedocs.io/en/latest/", + "contact": "llamastack@meta.com", +} + +SECURITY = [{}, {"BearerAuth": []}] + +SECURITY_SCHEMES = {"BearerAuth": {"type": "http", "scheme": "bearer"}} + +TARGETS = { + "node": { + "package_name": "llama-stack-client", + "production_repo": "llamastack/llama-stack-client-typescript", + "publish": {"npm": False}, + }, + "python": { + "package_name": "llama_stack_client", + "production_repo": "llamastack/llama-stack-client-python", + "options": {"use_uv": True}, + "publish": {"pypi": True}, + "project_name": "llama_stack_client", + }, + "kotlin": { + "reverse_domain": "com.llama_stack_client.api", + "production_repo": None, + "publish": {"maven": False}, + }, + "go": { + "package_name": "llama-stack-client", + "production_repo": "llamastack/llama-stack-client-go", + "options": {"enable_v2": True, "back_compat_use_shared_package": False}, + }, +} + +CLIENT_SETTINGS = { + "default_env_prefix": "LLAMA_STACK_CLIENT", + "opts": { + "api_key": { + "type": "string", + "read_env": "LLAMA_STACK_CLIENT_API_KEY", + "auth": {"security_scheme": "BearerAuth"}, + "nullable": True, + } + }, +} + +ENVIRONMENTS = {"production": "http://any-hosted-llama-stack.com"} + +PAGINATION = [ + { + "name": "datasets_iterrows", + "type": "offset", + "request": { + "dataset_id": {"type": "string"}, + "start_index": { + "type": "integer", + "x-stainless-pagination-property": {"purpose": "offset_count_param"}, + }, + "limit": {"type": "integer"}, + }, + "response": { + "data": {"type": "array", "items": {"type": "object"}}, + "next_index": { + "type": "integer", + "x-stainless-pagination-property": {"purpose": "offset_count_start_field"}, + }, + }, + }, + { + "name": "openai_cursor_page", + "type": "cursor", + "request": { + "limit": {"type": "integer"}, + "after": { + "type": "string", + "x-stainless-pagination-property": {"purpose": "next_cursor_param"}, + }, + }, + "response": { + "data": {"type": "array", "items": {}}, + "has_more": {"type": "boolean"}, + "last_id": { + "type": "string", + "x-stainless-pagination-property": {"purpose": "next_cursor_field"}, + }, + }, + }, +] + +SETTINGS = { + "license": "MIT", + "unwrap_response_fields": ["data"], + "file_header": "Copyright (c) Meta Platforms, Inc. and affiliates.\n" + "All rights reserved.\n" + "\n" + "This source code is licensed under the terms described in the " + "LICENSE file in\n" + "the root directory of this source tree.\n", +} + +OPENAPI = { + "transformations": [ + { + "command": "mergeObject", + "reason": "Better return_type using enum", + "args": { + "target": ["$.components.schemas"], + "object": { + "ReturnType": { + "additionalProperties": False, + "properties": { + "type": { + "enum": [ + "string", + "number", + "boolean", + "array", + "object", + "json", + "union", + "chat_completion_input", + "completion_input", + "agent_turn_input", + ] + } + }, + "required": ["type"], + "type": "object", + } + }, + }, + }, + { + "command": "replaceProperties", + "reason": "Replace return type properties with better model (see above)", + "args": { + "filter": { + "only": [ + "$.components.schemas.ScoringFn.properties.return_type", + "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type", + ] + }, + "value": {"$ref": "#/components/schemas/ReturnType"}, + }, + }, + { + "command": "oneOfToAnyOf", + "reason": "Prism (mock server) doesn't like one of our " + "requests as it technically matches multiple " + "variants", + }, + ] +} + +README = { + "example_requests": { + "default": { + "type": "request", + "endpoint": "post /v1/chat/completions", + "params": {}, + }, + "headline": {"type": "request", "endpoint": "get /v1/models", "params": {}}, + "pagination": { + "type": "request", + "endpoint": "post /v1/chat/completions", + "params": {}, + }, + } +} + +ALL_RESOURCES = { + "$shared": { + "models": { + "interleaved_content_item": "InterleavedContentItem", + "interleaved_content": "InterleavedContent", + "param_type": "ParamType", + "safety_violation": "SafetyViolation", + "sampling_params": "SamplingParams", + "scoring_result": "ScoringResult", + "system_message": "SystemMessage", + } + }, + "toolgroups": { + "models": { + "tool_group": "ToolGroup", + "list_tool_groups_response": "ListToolGroupsResponse", + }, + "methods": { + "register": "post /v1/toolgroups", + "get": "get /v1/toolgroups/{toolgroup_id}", + "list": "get /v1/toolgroups", + "unregister": "delete /v1/toolgroups/{toolgroup_id}", + }, + }, + "tools": { + "methods": { + "get": "get /v1/tools/{tool_name}", + "list": {"paginated": False, "endpoint": "get /v1/tools"}, + } + }, + "tool_runtime": { + "models": { + "tool_def": "ToolDef", + "tool_invocation_result": "ToolInvocationResult", + }, + "methods": { + "list_tools": { + "paginated": False, + "endpoint": "get /v1/tool-runtime/list-tools", + }, + "invoke_tool": "post /v1/tool-runtime/invoke", + }, + }, + "responses": { + "models": { + "response_object_stream": "OpenAIResponseObjectStream", + "response_object": "OpenAIResponseObject", + }, + "methods": { + "create": { + "type": "http", + "streaming": { + "stream_event_model": "responses.response_object_stream", + "param_discriminator": "stream", + }, + "endpoint": "post /v1/responses", + }, + "retrieve": "get /v1/responses/{response_id}", + "list": {"type": "http", "endpoint": "get /v1/responses"}, + "delete": { + "type": "http", + "endpoint": "delete /v1/responses/{response_id}", + }, + }, + "subresources": { + "input_items": { + "methods": { + "list": { + "type": "http", + "paginated": False, + "endpoint": "get /v1/responses/{response_id}/input_items", + } + } + } + }, + }, + "prompts": { + "models": {"prompt": "Prompt", "list_prompts_response": "ListPromptsResponse"}, + "methods": { + "create": "post /v1/prompts", + "list": {"paginated": False, "endpoint": "get /v1/prompts"}, + "retrieve": "get /v1/prompts/{prompt_id}", + "update": "post /v1/prompts/{prompt_id}", + "delete": "delete /v1/prompts/{prompt_id}", + "set_default_version": "post /v1/prompts/{prompt_id}/set-default-version", + }, + "subresources": { + "versions": { + "methods": { + "list": { + "paginated": False, + "endpoint": "get /v1/prompts/{prompt_id}/versions", + } + } + } + }, + }, + "conversations": { + "models": {"conversation_object": "Conversation"}, + "methods": { + "create": {"type": "http", "endpoint": "post /v1/conversations"}, + "retrieve": "get /v1/conversations/{conversation_id}", + "update": { + "type": "http", + "endpoint": "post /v1/conversations/{conversation_id}", + }, + "delete": { + "type": "http", + "endpoint": "delete /v1/conversations/{conversation_id}", + }, + }, + "subresources": { + "items": { + "methods": { + "get": { + "type": "http", + "endpoint": "get /v1/conversations/{conversation_id}/items/{item_id}", + }, + "list": { + "type": "http", + "endpoint": "get /v1/conversations/{conversation_id}/items", + }, + "create": { + "type": "http", + "endpoint": "post /v1/conversations/{conversation_id}/items", + }, + "delete": { + "type": "http", + "endpoint": "delete /v1/conversations/{conversation_id}/items/{item_id}", + }, + } + } + }, + }, + "inspect": { + "models": { + "healthInfo": "HealthInfo", + "providerInfo": "ProviderInfo", + "routeInfo": "RouteInfo", + "versionInfo": "VersionInfo", + }, + "methods": {"health": "get /v1/health", "version": "get /v1/version"}, + }, + "embeddings": { + "models": {"create_embeddings_response": "OpenAIEmbeddingsResponse"}, + "methods": {"create": "post /v1/embeddings"}, + }, + "chat": { + "models": {"chat_completion_chunk": "OpenAIChatCompletionChunk"}, + "subresources": { + "completions": { + "methods": { + "create": { + "type": "http", + "streaming": { + "stream_event_model": "chat.chat_completion_chunk", + "param_discriminator": "stream", + }, + "endpoint": "post /v1/chat/completions", + }, + "list": { + "type": "http", + "paginated": False, + "endpoint": "get /v1/chat/completions", + }, + "retrieve": { + "type": "http", + "endpoint": "get /v1/chat/completions/{completion_id}", + }, + } + } + }, + }, + "completions": { + "methods": { + "create": { + "type": "http", + "streaming": {"param_discriminator": "stream"}, + "endpoint": "post /v1/completions", + } + } + }, + "vector_io": { + "models": {"queryChunksResponse": "QueryChunksResponse"}, + "methods": { + "insert": "post /v1/vector-io/insert", + "query": "post /v1/vector-io/query", + }, + }, + "vector_stores": { + "models": { + "vector_store": "VectorStoreObject", + "list_vector_stores_response": "VectorStoreListResponse", + "vector_store_delete_response": "VectorStoreDeleteResponse", + "vector_store_search_response": "VectorStoreSearchResponsePage", + }, + "methods": { + "create": "post /v1/vector_stores", + "list": "get /v1/vector_stores", + "retrieve": "get /v1/vector_stores/{vector_store_id}", + "update": "post /v1/vector_stores/{vector_store_id}", + "delete": "delete /v1/vector_stores/{vector_store_id}", + "search": "post /v1/vector_stores/{vector_store_id}/search", + }, + "subresources": { + "files": { + "models": {"vector_store_file": "VectorStoreFileObject"}, + "methods": { + "list": "get /v1/vector_stores/{vector_store_id}/files", + "retrieve": "get /v1/vector_stores/{vector_store_id}/files/{file_id}", + "update": "post /v1/vector_stores/{vector_store_id}/files/{file_id}", + "delete": "delete /v1/vector_stores/{vector_store_id}/files/{file_id}", + "create": "post /v1/vector_stores/{vector_store_id}/files", + "content": "get /v1/vector_stores/{vector_store_id}/files/{file_id}/content", + }, + }, + "file_batches": { + "models": { + "vector_store_file_batches": "VectorStoreFileBatchObject", + "list_vector_store_files_in_batch_response": "VectorStoreFilesListInBatchResponse", + }, + "methods": { + "create": "post /v1/vector_stores/{vector_store_id}/file_batches", + "retrieve": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}", + "list_files": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", + "cancel": "post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", + }, + }, + }, + }, + "models": { + "models": { + "model": "OpenAIModel", + "list_models_response": "OpenAIListModelsResponse", + }, + "methods": { + "list": {"paginated": False, "endpoint": "get /v1/models"}, + "retrieve": "get /v1/models/{model_id}", + "register": "post /v1/models", + "unregister": "delete /v1/models/{model_id}", + }, + "subresources": {"openai": {"methods": {"list": {"paginated": False, "endpoint": "get /v1/models"}}}}, + }, + "providers": { + "models": {"list_providers_response": "ListProvidersResponse"}, + "methods": { + "list": {"paginated": False, "endpoint": "get /v1/providers"}, + "retrieve": "get /v1/providers/{provider_id}", + }, + }, + "routes": { + "models": {"list_routes_response": "ListRoutesResponse"}, + "methods": {"list": {"paginated": False, "endpoint": "get /v1/inspect/routes"}}, + }, + "moderations": { + "models": {"create_response": "ModerationObject"}, + "methods": {"create": "post /v1/moderations"}, + }, + "safety": { + "models": {"run_shield_response": "RunShieldResponse"}, + "methods": {"run_shield": "post /v1/safety/run-shield"}, + }, + "shields": { + "models": {"shield": "Shield", "list_shields_response": "ListShieldsResponse"}, + "methods": { + "retrieve": "get /v1/shields/{identifier}", + "list": {"paginated": False, "endpoint": "get /v1/shields"}, + "register": "post /v1/shields", + "delete": "delete /v1/shields/{identifier}", + }, + }, + "scoring": { + "methods": { + "score": "post /v1/scoring/score", + "score_batch": "post /v1/scoring/score-batch", + } + }, + "scoring_functions": { + "models": { + "scoring_fn": "ScoringFn", + "scoring_fn_params": "ScoringFnParams", + "list_scoring_functions_response": "ListScoringFunctionsResponse", + }, + "methods": { + "retrieve": "get /v1/scoring-functions/{scoring_fn_id}", + "list": {"paginated": False, "endpoint": "get /v1/scoring-functions"}, + "register": "post /v1/scoring-functions", + "unregister": "delete /v1/scoring-functions/{scoring_fn_id}", + }, + }, + "files": { + "models": { + "file": "OpenAIFileObject", + "list_files_response": "ListOpenAIFileResponse", + "delete_file_response": "OpenAIFileDeleteResponse", + }, + "methods": { + "create": "post /v1/files", + "list": "get /v1/files", + "retrieve": "get /v1/files/{file_id}", + "delete": "delete /v1/files/{file_id}", + "content": "get /v1/files/{file_id}/content", + }, + }, + "batches": { + "methods": { + "create": "post /v1/batches", + "list": "get /v1/batches", + "retrieve": "get /v1/batches/{batch_id}", + "cancel": "post /v1/batches/{batch_id}/cancel", + } + }, + "alpha": { + "subresources": { + "inference": {"methods": {"rerank": "post /v1alpha/inference/rerank"}}, + "post_training": { + "models": { + "algorithm_config": "AlgorithmConfig", + "post_training_job": "PostTrainingJob", + "list_post_training_jobs_response": "ListPostTrainingJobsResponse", + }, + "methods": { + "preference_optimize": "post /v1alpha/post-training/preference-optimize", + "supervised_fine_tune": "post /v1alpha/post-training/supervised-fine-tune", + }, + "subresources": { + "job": { + "methods": { + "artifacts": "get /v1alpha/post-training/job/artifacts", + "cancel": "post /v1alpha/post-training/job/cancel", + "status": "get /v1alpha/post-training/job/status", + "list": { + "paginated": False, + "endpoint": "get /v1alpha/post-training/jobs", + }, + } + } + }, + }, + "benchmarks": { + "models": { + "benchmark": "Benchmark", + "list_benchmarks_response": "ListBenchmarksResponse", + }, + "methods": { + "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}", + "list": { + "paginated": False, + "endpoint": "get /v1alpha/eval/benchmarks", + }, + "register": "post /v1alpha/eval/benchmarks", + "unregister": "delete /v1alpha/eval/benchmarks/{benchmark_id}", + }, + }, + "eval": { + "models": { + "evaluate_response": "EvaluateResponse", + "benchmark_config": "BenchmarkConfig", + "job": "Job", + }, + "methods": { + "evaluate_rows": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations", + "run_eval": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs", + "evaluate_rows_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations", + "run_eval_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs", + }, + "subresources": { + "jobs": { + "methods": { + "cancel": "delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}", + "status": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}", + "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", + } + } + }, + }, + } + }, + "beta": { + "subresources": { + "datasets": { + "models": {"list_datasets_response": "ListDatasetsResponse"}, + "methods": { + "register": "post /v1beta/datasets", + "retrieve": "get /v1beta/datasets/{dataset_id}", + "list": {"paginated": False, "endpoint": "get /v1beta/datasets"}, + "unregister": "delete /v1beta/datasets/{dataset_id}", + "iterrows": "get /v1beta/datasetio/iterrows/{dataset_id}", + "appendrows": "post /v1beta/datasetio/append-rows/{dataset_id}", + }, + } + } + }, +} + + +HTTP_METHODS = {"get", "post", "put", "patch", "delete", "options", "head"} + + +@dataclass +class Endpoint: + method: str + path: str + extra: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_config(cls, value: Any) -> Endpoint: + if isinstance(value, str): + method, _, path = value.partition(" ") + return cls._from_parts(method, path) + if isinstance(value, dict) and "endpoint" in value: + method, _, path = value["endpoint"].partition(" ") + extra = {k: v for k, v in value.items() if k != "endpoint"} + endpoint = cls._from_parts(method, path) + endpoint.extra.update(extra) + return endpoint + raise ValueError(f"Unsupported endpoint value: {value!r}") + + @classmethod + def _from_parts(cls, method: str, path: str) -> Endpoint: + method = method.strip().lower() + path = path.strip() + if method not in HTTP_METHODS: + raise ValueError(f"Unsupported HTTP method for Stainless config: {method!r}") + if not path.startswith("/"): + raise ValueError(f"Endpoint path must start with '/': {path!r}") + return cls(method=method, path=path) + + def to_config(self) -> Any: + if not self.extra: + return f"{self.method} {self.path}" + data = dict(self.extra) + data["endpoint"] = f"{self.method} {self.path}" + return data + + def route_key(self) -> str: + return f"{self.method} {self.path}" + + +@dataclass +class Resource: + models: dict[str, str] | None = None + methods: dict[str, Endpoint] = field(default_factory=dict) + subresources: dict[str, Resource] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Resource: + models = data.get("models") + methods = {name: Endpoint.from_config(value) for name, value in data.get("methods", {}).items()} + subresources = {name: cls.from_dict(value) for name, value in data.get("subresources", {}).items()} + return cls(models=models, methods=methods, subresources=subresources) + + def to_config(self) -> dict[str, Any]: + result: dict[str, Any] = {} + if self.models: + result["models"] = self.models + if self.methods: + result["methods"] = {name: endpoint.to_config() for name, endpoint in self.methods.items()} + if self.subresources: + result["subresources"] = {name: resource.to_config() for name, resource in self.subresources.items()} + return result + + def collect_endpoint_paths(self) -> set[str]: + paths = {endpoint.route_key() for endpoint in self.methods.values()} + for subresource in self.subresources.values(): + paths.update(subresource.collect_endpoint_paths()) + return paths + + def iter_endpoints(self, prefix: str) -> Iterator[tuple[str, str]]: + for method_name, endpoint in self.methods.items(): + label = f"{prefix}.{method_name}" if prefix else method_name + yield endpoint.route_key(), label + for sub_name, subresource in self.subresources.items(): + sub_prefix = f"{prefix}.{sub_name}" if prefix else sub_name + yield from subresource.iter_endpoints(sub_prefix) + + +_RESOURCES = {name: Resource.from_dict(data) for name, data in ALL_RESOURCES.items()} + + +def _load_openapi_paths(openapi_path: Path) -> set[str]: + spec = yaml.safe_load(openapi_path.read_text()) or {} + paths: set[str] = set() + for path, path_item in (spec.get("paths") or {}).items(): + if not isinstance(path_item, dict): + continue + for method, operation in path_item.items(): + if not isinstance(operation, dict): + continue + paths.add(f"{str(method).lower()} {path}") + return paths + + +@dataclass(frozen=True) +class StainlessConfig: + organization: dict[str, Any] + security: list[Any] + security_schemes: dict[str, Any] + targets: dict[str, Any] + client_settings: dict[str, Any] + environments: dict[str, Any] + pagination: list[dict[str, Any]] + settings: dict[str, Any] + openapi: dict[str, Any] + readme: dict[str, Any] + resources: dict[str, Resource] + + @classmethod + def make(cls) -> StainlessConfig: + return cls( + organization=ORGANIZATION, + security=SECURITY, + security_schemes=SECURITY_SCHEMES, + targets=TARGETS, + client_settings=CLIENT_SETTINGS, + environments=ENVIRONMENTS, + pagination=PAGINATION, + settings=SETTINGS, + openapi=OPENAPI, + readme=README, + resources=dict(_RESOURCES), + ) + + def referenced_paths(self) -> set[str]: + paths: set[str] = set() + for resource in self.resources.values(): + paths.update(resource.collect_endpoint_paths()) + paths.update(self.readme_endpoint_paths()) + return paths + + def readme_endpoint_paths(self) -> set[str]: + example_requests = self.readme.get("example_requests", {}) if self.readme else {} + paths: set[str] = set() + for entry in example_requests.values(): + endpoint = entry.get("endpoint") if isinstance(entry, dict) else None + if isinstance(endpoint, str): + method, _, route = endpoint.partition(" ") + method = method.strip().lower() + route = route.strip() + if method and route: + paths.add(f"{method} {route}") + return paths + + def endpoint_map(self) -> dict[str, list[str]]: + mapping: dict[str, list[str]] = {} + for resource_name, resource in self.resources.items(): + for route, label in resource.iter_endpoints(resource_name): + mapping.setdefault(route, []).append(label) + return mapping + + def validate_unique_endpoints(self) -> None: + duplicates: dict[str, list[str]] = {} + for route, labels in self.endpoint_map().items(): + top_levels = {label.split(".", 1)[0] for label in labels} + if len(top_levels) > 1: + duplicates[route] = labels + if duplicates: + formatted = "\n".join( + f" - {route} defined in: {', '.join(sorted(labels))}" for route, labels in sorted(duplicates.items()) + ) + raise ValueError("Duplicate endpoints found across resources:\n" + formatted) + + def validate_readme_endpoints(self) -> None: + resource_paths: set[str] = set() + for resource in self.resources.values(): + resource_paths.update(resource.collect_endpoint_paths()) + missing = sorted(path for path in self.readme_endpoint_paths() if path not in resource_paths) + if missing: + formatted = "\n".join(f" - {path}" for path in missing) + raise ValueError("README example endpoints are not present in Stainless resources:\n" + formatted) + + def to_dict(self) -> dict[str, Any]: + cfg: dict[str, Any] = {} + for section in SECTION_ORDER: + if section == "resources": + cfg[section] = {name: resource.to_config() for name, resource in self.resources.items()} + continue + cfg[section] = getattr(self, section) + return cfg + + def validate_against_openapi(self, openapi_path: Path) -> None: + if not openapi_path.exists(): + raise FileNotFoundError(f"OpenAPI spec not found at {openapi_path}") + spec_paths = _load_openapi_paths(openapi_path) + config_paths = self.referenced_paths() + missing = sorted(path for path in config_paths if path not in spec_paths) + if missing: + formatted = "\n".join(f" - {path}" for path in missing) + raise ValueError("Stainless config references missing endpoints:\n" + formatted) + + def validate(self, openapi_path: Path | None = None) -> None: + self.validate_unique_endpoints() + self.validate_readme_endpoints() + if openapi_path is not None: + self.validate_against_openapi(openapi_path) + + +def build_config() -> dict[str, Any]: + return StainlessConfig.make().to_dict() + + +def write_config(repo_root: Path, openapi_path: Path | None = None) -> Path: + stainless_config = StainlessConfig.make() + spec_path = (openapi_path or (repo_root / "client-sdks" / "stainless" / "openapi.yml")).resolve() + stainless_config.validate(spec_path) + yaml_text = yaml.safe_dump(stainless_config.to_dict(), sort_keys=False) + output = repo_root / "client-sdks" / "stainless" / "config.yml" + output.write_text(HEADER + yaml_text) + return output + + +def main() -> None: + repo_root = Path(__file__).resolve().parents[3] + output = write_config(repo_root) + print(f"Wrote Stainless config: {output}") + + +if __name__ == "__main__": + main() diff --git a/scripts/openapi_generator/state.py b/scripts/openapi_generator/state.py new file mode 100644 index 000000000..babd1451a --- /dev/null +++ b/scripts/openapi_generator/state.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Shared state for the OpenAPI generator module. +""" + +from typing import Any + +from llama_stack_api import Api +from llama_stack_api.schema_utils import clear_dynamic_schema_types, register_dynamic_schema_type + +_dynamic_model_registry: dict[str, type] = {} + +# Cache for protocol methods to avoid repeated lookups +_protocol_methods_cache: dict[Api, dict[str, Any]] | None = None + +# Global dict to store extra body field information by endpoint +# Key: (path, method) tuple, Value: list of (param_name, param_type, description) tuples +_extra_body_fields: dict[tuple[str, str], list[tuple[str, type, str | None]]] = {} + + +def register_dynamic_model(name: str, model: type) -> type: + """Register and deduplicate dynamically generated request models.""" + existing = _dynamic_model_registry.get(name) + if existing is not None: + register_dynamic_schema_type(existing) + return existing + _dynamic_model_registry[name] = model + register_dynamic_schema_type(model) + return model + + +def reset_generator_state() -> None: + """Clear per-run caches so repeated generations stay deterministic.""" + _dynamic_model_registry.clear() + _extra_body_fields.clear() + clear_dynamic_schema_types() diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index de79b4d17..0eec46bc2 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -8,7 +8,8 @@ import subprocess import sys from pathlib import Path -from typing import Any +from types import UnionType +from typing import Annotated, Any, Union, get_args, get_origin from pydantic_core import PydanticUndefined from rich.progress import Progress, SpinnerColumn, TextColumn @@ -22,7 +23,7 @@ def get_api_docstring(api_name: str) -> str | None: """Extract docstring from the API protocol class.""" try: # Import the API module dynamically - api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()]) + api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()]) # Get the main protocol class (usually capitalized API name) protocol_class_name = api_name.title() @@ -51,6 +52,41 @@ class ChangedPathTracker: return self._changed_paths +def extract_type_annotation(annotation: Any) -> str: + """extract a type annotation into a clean string representation.""" + if annotation is None: + return "Any" + + if annotation is type(None): + return "None" + + origin = get_origin(annotation) + args = get_args(annotation) + + # recursive workaround for Annotated types to ignore FieldInfo part + if origin is Annotated and args: + return extract_type_annotation(args[0]) + + if origin in [Union, UnionType]: + non_none_args = [arg for arg in args if arg is not type(None)] + has_none = len(non_none_args) < len(args) + + if len(non_none_args) == 1: + formatted = extract_type_annotation(non_none_args[0]) + return f"{formatted} | None" if has_none else formatted + else: + formatted_args = [extract_type_annotation(arg) for arg in non_none_args] + result = " | ".join(formatted_args) + return f"{result} | None" if has_none else result + + if origin is not None and args: + origin_name = getattr(origin, "__name__", str(origin)) + formatted_args = [extract_type_annotation(arg) for arg in args] + return f"{origin_name}[{', '.join(formatted_args)}]" + + return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation) + + def get_config_class_info(config_class_path: str) -> dict[str, Any]: """Extract configuration information from a config class.""" try: @@ -78,13 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]: for field_name, field in config_class.model_fields.items(): if getattr(field, "exclude", False): continue - field_type = str(field.annotation) if field.annotation else "Any" - # this string replace is ridiculous - field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "") - field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "") - field_type = field_type.replace("llama_stack.apis.inference.inference.", "") - field_type = field_type.replace("llama_stack.providers.", "") + field_type = extract_type_annotation(field.annotation) default_value = field.default if field.default_factory is not None: @@ -344,8 +375,16 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri # Add YAML frontmatter for index md_lines.append("---") if api_docstring: - clean_desc = api_docstring.strip().replace('"', '\\"') - md_lines.append(f'description: "{clean_desc}"') + # Handle multi-line descriptions in YAML + if "\n" in api_docstring.strip(): + md_lines.append("description: |") + for line in api_docstring.strip().split("\n"): + # Avoid trailing whitespace by only adding spaces to non-empty lines + md_lines.append(f" {line}" if line.strip() else "") + else: + # For single line descriptions, format properly for YAML + clean_desc = api_docstring.strip().replace('"', '\\"') + md_lines.append(f'description: "{clean_desc}"') md_lines.append(f"sidebar_label: {sidebar_label}") md_lines.append(f"title: {api_name.title()}") md_lines.append("---") diff --git a/scripts/run-ui-linter.sh b/scripts/run-ui-linter.sh index 3ced4483b..0d69ba5f4 100755 --- a/scripts/run-ui-linter.sh +++ b/scripts/run-ui-linter.sh @@ -6,7 +6,7 @@ # the root directory of this source tree. set -e -cd llama_stack/ui +cd src/llama_stack_ui if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then echo "UI dependencies not installed, skipping prettier/linter check" diff --git a/scripts/run_openapi_generator.sh b/scripts/run_openapi_generator.sh new file mode 100755 index 000000000..d4e3b2ec7 --- /dev/null +++ b/scripts/run_openapi_generator.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +PYTHONPATH=${PYTHONPATH:-} +THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" + +set -euo pipefail + + +stack_dir=$(dirname "$THIS_DIR") +PYTHONPATH=$PYTHONPATH:$stack_dir \ + python3 -m scripts.openapi_generator "$stack_dir"/docs/static + +cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml +PYTHONPATH=$PYTHONPATH:$stack_dir \ + python3 -m scripts.openapi_generator.stainless_config.generate_config diff --git a/scripts/telemetry/grafana-dashboards.yaml b/scripts/telemetry/grafana-dashboards.yaml new file mode 100644 index 000000000..f063fa518 --- /dev/null +++ b/scripts/telemetry/grafana-dashboards.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'Llama Stack' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/scripts/telemetry/grafana-datasources.yaml b/scripts/telemetry/grafana-datasources.yaml index d01fe04ce..0634ac687 100644 --- a/scripts/telemetry/grafana-datasources.yaml +++ b/scripts/telemetry/grafana-datasources.yaml @@ -5,6 +5,7 @@ datasources: type: prometheus access: proxy url: http://prometheus:9090 + uid: prometheus isDefault: true editable: true diff --git a/scripts/telemetry/llama-stack-dashboard.json b/scripts/telemetry/llama-stack-dashboard.json new file mode 100644 index 000000000..a8db9713c --- /dev/null +++ b/scripts/telemetry/llama-stack-dashboard.json @@ -0,0 +1,639 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Prompt Tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Completion Tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", + "legendFormat": "p95", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", + "legendFormat": "p99", + "refId": "B" + } + ], + "title": "HTTP Request Duration (p95, p99)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", + "refId": "A" + } + ], + "title": "Total Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(llama_stack_http_server_active_requests)", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", + "legendFormat": "{{http_target}} - {{http_status_code}}", + "refId": "A" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", + "legendFormat": "Request", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", + "legendFormat": "Response", + "refId": "B" + } + ], + "title": "Request/Response Sizes", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [ + "llama-stack" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "Llama Stack Metrics", + "uid": "llama-stack-metrics", + "version": 17, + "weekStart": "" +} diff --git a/scripts/telemetry/setup_telemetry.sh b/scripts/telemetry/setup_telemetry.sh index ecdd56175..cbc052f92 100755 --- a/scripts/telemetry/setup_telemetry.sh +++ b/scripts/telemetry/setup_telemetry.sh @@ -16,14 +16,59 @@ set -Eeuo pipefail -if command -v podman &> /dev/null; then - CONTAINER_RUNTIME="podman" -elif command -v docker &> /dev/null; then - CONTAINER_RUNTIME="docker" -else - echo "🚨 Neither Podman nor Docker could be found" - echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" - exit 1 +# Parse arguments +CONTAINER_RUNTIME="" + +print_usage() { + echo "Usage: $0 [--container docker|podman]" + echo "" + echo "Options:" + echo " -c, --container Choose container runtime (docker or podman)." + echo " -h, --help Show this help." +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -c|--container) + if [[ $# -lt 2 ]]; then + echo "🚨 --container requires a value: docker or podman" + exit 1 + fi + case "$2" in + docker|podman) + CONTAINER_RUNTIME="$2" + shift 2 + ;; + *) + echo "🚨 Invalid container runtime: $2" + echo "Valid options are: docker, podman" + exit 1 + ;; + esac + ;; + -h|--help) + print_usage + exit 0 + ;; + *) + echo "🚨 Unknown argument: $1" + print_usage + exit 1 + ;; + esac +done + +# Detect container runtime if not specified +if [[ -z "$CONTAINER_RUNTIME" ]]; then + if command -v podman &> /dev/null; then + CONTAINER_RUNTIME="podman" + elif command -v docker &> /dev/null; then + CONTAINER_RUNTIME="docker" + else + echo "🚨 Neither Podman nor Docker could be found" + echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" + exit 1 + fi fi echo "🚀 Setting up telemetry stack for Llama Stack using $CONTAINER_RUNTIME..." @@ -90,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \ -e GF_SECURITY_ADMIN_PASSWORD=admin \ -e GF_USERS_ALLOW_SIGN_UP=false \ -v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \ + -v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \ + -v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \ docker.io/grafana/grafana:11.0.0 # Wait for services to start diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh index ff42d3039..481c6fc95 100755 --- a/scripts/unit-tests.sh +++ b/scripts/unit-tests.sh @@ -27,4 +27,4 @@ fi # Run unit tests with coverage uv run --python "$PYTHON_VERSION" --with-editable . --group unit \ - coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@" + coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@" diff --git a/scripts/uv-run-with-index.sh b/scripts/uv-run-with-index.sh new file mode 100755 index 000000000..18d0a0e9c --- /dev/null +++ b/scripts/uv-run-with-index.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +set -euo pipefail + +# Detect current branch and target branch +# In GitHub Actions, use GITHUB_REF/GITHUB_BASE_REF +if [[ -n "${GITHUB_REF:-}" ]]; then + BRANCH="${GITHUB_REF#refs/heads/}" +else + BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "") +fi + +# For PRs, check the target branch +if [[ -n "${GITHUB_BASE_REF:-}" ]]; then + TARGET_BRANCH="${GITHUB_BASE_REF}" +else + TARGET_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "") +fi + +# Check if on a release branch or targeting one, or LLAMA_STACK_RELEASE_MODE is set +IS_RELEASE=false +if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + IS_RELEASE=true +elif [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + IS_RELEASE=true +elif [[ "${LLAMA_STACK_RELEASE_MODE:-}" == "true" ]]; then + IS_RELEASE=true +fi + +# On release branches, use test.pypi as extra index for RC versions +if [[ "$IS_RELEASE" == "true" ]]; then + export UV_EXTRA_INDEX_URL="https://test.pypi.org/simple/" + export UV_INDEX_STRATEGY="unsafe-best-match" +fi + +# Run uv with all arguments passed through +exec uv "$@" diff --git a/docs/openapi_generator/pyopenapi/__init__.py b/src/llama_stack/__init__.py similarity index 100% rename from docs/openapi_generator/pyopenapi/__init__.py rename to src/llama_stack/__init__.py diff --git a/llama_stack/apis/__init__.py b/src/llama_stack/cli/__init__.py similarity index 100% rename from llama_stack/apis/__init__.py rename to src/llama_stack/cli/__init__.py diff --git a/llama_stack/cli/llama.py b/src/llama_stack/cli/llama.py similarity index 100% rename from llama_stack/cli/llama.py rename to src/llama_stack/cli/llama.py diff --git a/llama_stack/apis/common/__init__.py b/src/llama_stack/cli/scripts/__init__.py similarity index 100% rename from llama_stack/apis/common/__init__.py rename to src/llama_stack/cli/scripts/__init__.py diff --git a/llama_stack/cli/scripts/install-wheel-from-presigned.sh b/src/llama_stack/cli/scripts/install-wheel-from-presigned.sh similarity index 100% rename from llama_stack/cli/scripts/install-wheel-from-presigned.sh rename to src/llama_stack/cli/scripts/install-wheel-from-presigned.sh diff --git a/llama_stack/cli/scripts/run.py b/src/llama_stack/cli/scripts/run.py similarity index 100% rename from llama_stack/cli/scripts/run.py rename to src/llama_stack/cli/scripts/run.py diff --git a/llama_stack/cli/stack/__init__.py b/src/llama_stack/cli/stack/__init__.py similarity index 100% rename from llama_stack/cli/stack/__init__.py rename to src/llama_stack/cli/stack/__init__.py diff --git a/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py similarity index 99% rename from llama_stack/cli/stack/_list_deps.py rename to src/llama_stack/cli/stack/_list_deps.py index 18141be5f..82bef1a4f 100644 --- a/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -21,7 +21,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.distribution import get_provider_registry from llama_stack.core.stack import replace_env_vars from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates" diff --git a/llama_stack/cli/stack/list_apis.py b/src/llama_stack/cli/stack/list_apis.py similarity index 100% rename from llama_stack/cli/stack/list_apis.py rename to src/llama_stack/cli/stack/list_apis.py diff --git a/llama_stack/cli/stack/list_deps.py b/src/llama_stack/cli/stack/list_deps.py similarity index 94% rename from llama_stack/cli/stack/list_deps.py rename to src/llama_stack/cli/stack/list_deps.py index b6eee1f3b..d6c52c8ef 100644 --- a/llama_stack/cli/stack/list_deps.py +++ b/src/llama_stack/cli/stack/list_deps.py @@ -46,6 +46,10 @@ class StackListDeps(Subcommand): def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None: # always keep implementation completely silo-ed away from CLI so CLI # can be fast to load and reduces dependencies + if not args.config and not args.providers: + self.parser.print_help() + self.parser.exit() + from ._list_deps import run_stack_list_deps_command return run_stack_list_deps_command(args) diff --git a/llama_stack/cli/stack/list_providers.py b/src/llama_stack/cli/stack/list_providers.py similarity index 100% rename from llama_stack/cli/stack/list_providers.py rename to src/llama_stack/cli/stack/list_providers.py diff --git a/src/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py new file mode 100644 index 000000000..ae59ba911 --- /dev/null +++ b/src/llama_stack/cli/stack/list_stacks.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +from pathlib import Path + +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR + + +class StackListBuilds(Subcommand): + """List available distributions (both built-in and custom)""" + + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "list", + prog="llama stack list", + description="list available distributions", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._list_stack_command) + + def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]: + """Return a dictionary of distribution names and their paths with source type + + Returns: + dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom' + """ + distributions = {} + + # Get built-in distributions from source code + distro_dir = Path(__file__).parent.parent.parent / "distributions" + if distro_dir.exists(): + for stack_dir in distro_dir.iterdir(): + if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"): + distributions[stack_dir.name] = (stack_dir, "built-in") + + # Get custom/run distributions from ~/.llama/distributions + # These override built-in ones if they have the same name + if DISTRIBS_BASE_DIR.exists(): + for stack_dir in DISTRIBS_BASE_DIR.iterdir(): + if stack_dir.is_dir() and not stack_dir.name.startswith("."): + # Clean up the name (remove llamastack- prefix if present) + name = stack_dir.name.replace("llamastack-", "") + distributions[name] = (stack_dir, "custom") + + return distributions + + def _list_stack_command(self, args: argparse.Namespace) -> None: + distributions = self._get_distribution_dirs() + + if not distributions: + print("No distributions found") + return + + headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"] + rows = [] + for name, (path, source_type) in sorted(distributions.items()): + row = [name, source_type, str(path)] + # Check for build and run config files + # For built-in distributions, configs are named build.yaml and run.yaml + # For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml + if source_type == "built-in": + build_config = "Yes" if (path / "build.yaml").exists() else "No" + run_config = "Yes" if (path / "run.yaml").exists() else "No" + else: + build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" + run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" + row.extend([build_config, run_config]) + rows.append(row) + print_table(rows, headers, separate_rows=True) diff --git a/llama_stack/cli/stack/remove.py b/src/llama_stack/cli/stack/remove.py similarity index 100% rename from llama_stack/cli/stack/remove.py rename to src/llama_stack/cli/stack/remove.py diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py new file mode 100644 index 000000000..bc4ef70fd --- /dev/null +++ b/src/llama_stack/cli/stack/run.py @@ -0,0 +1,326 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +import os +import ssl +import subprocess +import sys +from pathlib import Path + +import uvicorn +import yaml +from termcolor import cprint + +from llama_stack.cli.stack.utils import ImageType +from llama_stack.cli.subcommand import Subcommand +from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.log import LoggingConfig, get_logger + +REPO_ROOT = Path(__file__).parent.parent.parent.parent + +logger = get_logger(name=__name__, category="cli") + + +class StackRun(Subcommand): + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "run", + prog="llama stack run", + description="""Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.""", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._run_stack_run_cmd) + + def _add_arguments(self): + self.parser.add_argument( + "config", + type=str, + nargs="?", # Make it optional + metavar="config | distro", + help="Path to config file to use for the run or name of known distro (`llama stack list` for a list).", + ) + self.parser.add_argument( + "--port", + type=int, + help="Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT.", + default=int(os.getenv("LLAMA_STACK_PORT", 8321)), + ) + self.parser.add_argument( + "--image-name", + type=str, + default=None, + help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", + ) + self.parser.add_argument( + "--image-type", + type=str, + help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", + choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value], + ) + self.parser.add_argument( + "--enable-ui", + action="store_true", + help="Start the UI server", + ) + self.parser.add_argument( + "--providers", + type=str, + default=None, + help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.", + ) + + def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: + import yaml + + from llama_stack.core.configure import parse_and_maybe_upgrade_config + + if args.image_type or args.image_name: + self.parser.error( + "The --image-type and --image-name flags are no longer supported.\n\n" + "Please activate your virtual environment manually before running `llama stack run`.\n\n" + "For example:\n" + " source /path/to/venv/bin/activate\n" + " llama stack run \n" + ) + + if args.enable_ui: + self._start_ui_development_server(args.port) + + if args.config: + try: + from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro + + config_file = resolve_config_or_distro(args.config, Mode.RUN) + except ValueError as e: + self.parser.error(str(e)) + elif args.providers: + provider_list: dict[str, list[Provider]] = dict() + for api_provider in args.providers.split(","): + if "=" not in api_provider: + cprint( + "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2", + color="red", + file=sys.stderr, + ) + sys.exit(1) + api, provider_type = api_provider.split("=") + providers_for_api = get_provider_registry().get(Api(api), None) + if providers_for_api is None: + cprint( + f"{api} is not a valid API.", + color="red", + file=sys.stderr, + ) + sys.exit(1) + if provider_type in providers_for_api: + config_type = instantiate_class_type(providers_for_api[provider_type].config_class) + if config_type is not None and hasattr(config_type, "sample_run_config"): + config = config_type.sample_run_config(__distro_dir__="~/.llama/distributions/providers-run") + else: + config = {} + provider = Provider( + provider_type=provider_type, + config=config, + provider_id=provider_type.split("::")[1], + ) + provider_list.setdefault(api, []).append(provider) + else: + cprint( + f"{provider} is not a valid provider for the {api} API.", + color="red", + file=sys.stderr, + ) + sys.exit(1) + run_config = self._generate_run_config_from_providers(providers=provider_list) + config_dict = run_config.model_dump(mode="json") + + # Write config to disk in providers-run directory + distro_dir = DISTRIBS_BASE_DIR / "providers-run" + config_file = distro_dir / "run.yaml" + + logger.info(f"Writing generated config to: {config_file}") + with open(config_file, "w") as f: + yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False) + + else: + config_file = None + + if config_file: + logger.info(f"Using run configuration: {config_file}") + + try: + config_dict = yaml.safe_load(config_file.read_text()) + except yaml.parser.ParserError as e: + self.parser.error(f"failed to load config file '{config_file}':\n {e}") + + try: + config = parse_and_maybe_upgrade_config(config_dict) + # Create external_providers_dir if it's specified and doesn't exist + if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)): + os.makedirs(str(config.external_providers_dir), exist_ok=True) + except AttributeError as e: + self.parser.error(f"failed to parse config file '{config_file}':\n {e}") + + self._uvicorn_run(config_file, args) + + def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None: + if not config_file: + self.parser.error("Config file is required") + + config_file = resolve_config_or_distro(str(config_file), Mode.RUN) + with open(config_file) as fp: + config_contents = yaml.safe_load(fp) + if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): + logger_config = LoggingConfig(**cfg) + else: + logger_config = None + config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) + + port = args.port or config.server.port + host = config.server.host or ["::", "0.0.0.0"] + + # Set the config file in environment so create_app can find it + os.environ["LLAMA_STACK_CONFIG"] = str(config_file) + + uvicorn_config = { + "factory": True, + "host": host, + "port": port, + "lifespan": "on", + "log_level": logger.getEffectiveLevel(), + "log_config": logger_config, + "workers": config.server.workers, + } + + keyfile = config.server.tls_keyfile + certfile = config.server.tls_certfile + if keyfile and certfile: + uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile + uvicorn_config["ssl_certfile"] = config.server.tls_certfile + if config.server.tls_cafile: + uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile + uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED + + logger.info( + f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}" + ) + else: + logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}") + + logger.info(f"Listening on {host}:{port}") + + # We need to catch KeyboardInterrupt because uvicorn's signal handling + # re-raises SIGINT signals using signal.raise_signal(), which Python + # converts to KeyboardInterrupt. Without this catch, we'd get a confusing + # stack trace when using Ctrl+C or kill -2 (SIGINT). + # SIGTERM (kill -15) works fine without this because Python doesn't + # have a default handler for it. + # + # Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own + # signal handling but this is quite intrusive and not worth the effort. + try: + uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type] + except (KeyboardInterrupt, SystemExit): + logger.info("Received interrupt signal, shutting down gracefully...") + + def _start_ui_development_server(self, stack_server_port: int): + logger.info("Attempting to start UI development server...") + # Check if npm is available + npm_check = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=False) + if npm_check.returncode != 0: + logger.warning( + f"'npm' command not found or not executable. UI development server will not be started. Error: {npm_check.stderr}" + ) + return + + ui_dir = REPO_ROOT / "llama_stack_ui" + logs_dir = Path("~/.llama/ui/logs").expanduser() + try: + # Create logs directory if it doesn't exist + logs_dir.mkdir(parents=True, exist_ok=True) + + ui_stdout_log_path = logs_dir / "stdout.log" + ui_stderr_log_path = logs_dir / "stderr.log" + + # Open log files in append mode + stdout_log_file = open(ui_stdout_log_path, "a") + stderr_log_file = open(ui_stderr_log_path, "a") + + process = subprocess.Popen( + ["npm", "run", "dev"], + cwd=str(ui_dir), + stdout=stdout_log_file, + stderr=stderr_log_file, + env={**os.environ, "NEXT_PUBLIC_LLAMA_STACK_BASE_URL": f"http://localhost:{stack_server_port}"}, + ) + logger.info(f"UI development server process started in {ui_dir} with PID {process.pid}.") + logger.info(f"Logs: stdout -> {ui_stdout_log_path}, stderr -> {ui_stderr_log_path}") + logger.info(f"UI will be available at http://localhost:{os.getenv('LLAMA_STACK_UI_PORT', 8322)}") + + except FileNotFoundError: + logger.error( + "Failed to start UI development server: 'npm' command not found. Make sure npm is installed and in your PATH." + ) + except Exception as e: + logger.error(f"Failed to start UI development server in {ui_dir}: {e}") + + def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]): + apis = list(providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / "providers-run" + # need somewhere to put the storage. + os.makedirs(distro_dir, exist_ok=True) + storage = StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", + ), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference( + backend="kv_default", + namespace="registry", + ), + inference=InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ), + conversations=SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ), + prompts=KVStoreReference( + backend="kv_default", + namespace="prompts", + ), + ), + ) + + return StackRunConfig( + image_name="providers-run", + apis=apis, + providers=providers, + storage=storage, + ) diff --git a/llama_stack/cli/stack/stack.py b/src/llama_stack/cli/stack/stack.py similarity index 100% rename from llama_stack/cli/stack/stack.py rename to src/llama_stack/cli/stack/stack.py diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py new file mode 100644 index 000000000..d49b142e0 --- /dev/null +++ b/src/llama_stack/cli/stack/utils.py @@ -0,0 +1,151 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import sys +from enum import Enum +from functools import lru_cache +from pathlib import Path + +import yaml +from termcolor import cprint + +from llama_stack.core.datatypes import ( + BuildConfig, + Provider, + StackRunConfig, + StorageConfig, +) +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.resolver import InvalidProviderError +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, +) +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.core.utils.image_types import LlamaStackImageType +from llama_stack_api import Api + +TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions" + + +class ImageType(Enum): + CONTAINER = "container" + VENV = "venv" + + +def print_subcommand_description(parser, subparsers): + """Print descriptions of subcommands.""" + description_text = "" + for name, subcommand in subparsers.choices.items(): + description = subcommand.description + description_text += f" {name:<21} {description}\n" + parser.epilog = description_text + + +def generate_run_config( + build_config: BuildConfig, + build_dir: Path, + image_name: str, +) -> Path: + """ + Generate a run.yaml template file for user to edit from a build.yaml file + """ + apis = list(build_config.distribution_spec.providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / image_name + run_config = StackRunConfig( + container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), + image_name=image_name, + apis=apis, + providers={}, + storage=StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")), + "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + ), + ), + external_providers_dir=build_config.external_providers_dir + if build_config.external_providers_dir + else EXTERNAL_PROVIDERS_DIR, + ) + # build providers dict + provider_registry = get_provider_registry(build_config) + for api in apis: + run_config.providers[api] = [] + providers = build_config.distribution_spec.providers[api] + + for provider in providers: + pid = provider.provider_type.split("::")[-1] + + p = provider_registry[Api(api)][provider.provider_type] + if p.deprecation_error: + raise InvalidProviderError(p.deprecation_error) + + try: + config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class) + except (ModuleNotFoundError, ValueError) as exc: + # HACK ALERT: + # This code executes after building is done, the import cannot work since the + # package is either available in the venv or container - not available on the host. + # TODO: use a "is_external" flag in ProviderSpec to check if the provider is + # external + cprint( + f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}", + color="yellow", + file=sys.stderr, + ) + # Set config_type to None to avoid UnboundLocalError + config_type = None + + if config_type is not None and hasattr(config_type, "sample_run_config"): + config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}") + else: + config = {} + + p_spec = Provider( + provider_id=pid, + provider_type=provider.provider_type, + config=config, + module=provider.module, + ) + run_config.providers[api].append(p_spec) + + run_config_file = build_dir / f"{image_name}-run.yaml" + + with open(run_config_file, "w") as f: + to_write = json.loads(run_config.model_dump_json()) + f.write(yaml.dump(to_write, sort_keys=False)) + + # Only print this message for non-container builds since it will be displayed before the + # container is built + # For non-container builds, the run.yaml is generated at the very end of the build process so it + # makes sense to display this message + if build_config.image_type != LlamaStackImageType.CONTAINER.value: + cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) + return run_config_file + + +@lru_cache +def available_templates_specs() -> dict[str, BuildConfig]: + import yaml + + template_specs = {} + for p in TEMPLATES_PATH.rglob("*build.yaml"): + template_name = p.parent.name + with open(p) as f: + build_config = BuildConfig(**yaml.safe_load(f)) + template_specs[template_name] = build_config + return template_specs diff --git a/llama_stack/cli/subcommand.py b/src/llama_stack/cli/subcommand.py similarity index 100% rename from llama_stack/cli/subcommand.py rename to src/llama_stack/cli/subcommand.py diff --git a/llama_stack/cli/table.py b/src/llama_stack/cli/table.py similarity index 100% rename from llama_stack/cli/table.py rename to src/llama_stack/cli/table.py diff --git a/llama_stack/cli/utils.py b/src/llama_stack/cli/utils.py similarity index 100% rename from llama_stack/cli/utils.py rename to src/llama_stack/cli/utils.py diff --git a/llama_stack/cli/__init__.py b/src/llama_stack/core/__init__.py similarity index 100% rename from llama_stack/cli/__init__.py rename to src/llama_stack/core/__init__.py diff --git a/llama_stack/cli/scripts/__init__.py b/src/llama_stack/core/access_control/__init__.py similarity index 100% rename from llama_stack/cli/scripts/__init__.py rename to src/llama_stack/core/access_control/__init__.py diff --git a/llama_stack/core/access_control/access_control.py b/src/llama_stack/core/access_control/access_control.py similarity index 100% rename from llama_stack/core/access_control/access_control.py rename to src/llama_stack/core/access_control/access_control.py diff --git a/llama_stack/core/access_control/conditions.py b/src/llama_stack/core/access_control/conditions.py similarity index 100% rename from llama_stack/core/access_control/conditions.py rename to src/llama_stack/core/access_control/conditions.py diff --git a/llama_stack/core/access_control/datatypes.py b/src/llama_stack/core/access_control/datatypes.py similarity index 100% rename from llama_stack/core/access_control/datatypes.py rename to src/llama_stack/core/access_control/datatypes.py diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py new file mode 100644 index 000000000..630b2a47f --- /dev/null +++ b/src/llama_stack/core/build.py @@ -0,0 +1,99 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import sys + +from pydantic import BaseModel +from termcolor import cprint + +from llama_stack.core.datatypes import BuildConfig +from llama_stack.core.distribution import get_provider_registry +from llama_stack.distributions.template import DistributionTemplate +from llama_stack.log import get_logger +from llama_stack_api import Api + +log = get_logger(name=__name__, category="core") + +# These are the dependencies needed by the distribution server. +# `llama-stack` is automatically installed by the installation script. +SERVER_DEPENDENCIES = [ + "aiosqlite", + "fastapi", + "fire", + "httpx", + "uvicorn", + "opentelemetry-sdk", + "opentelemetry-exporter-otlp-proto-http", +] + + +class ApiInput(BaseModel): + api: Api + provider: str + + +def get_provider_dependencies( + config: BuildConfig | DistributionTemplate, +) -> tuple[list[str], list[str], list[str]]: + """Get normal and special dependencies from provider configuration.""" + if isinstance(config, DistributionTemplate): + config = config.build_config() + + providers = config.distribution_spec.providers + additional_pip_packages = config.additional_pip_packages + + deps = [] + external_provider_deps = [] + registry = get_provider_registry(config) + for api_str, provider_or_providers in providers.items(): + providers_for_api = registry[Api(api_str)] + + providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers] + + for provider in providers: + # Providers from BuildConfig and RunConfig are subtly different - not great + provider_type = provider if isinstance(provider, str) else provider.provider_type + + if provider_type not in providers_for_api: + raise ValueError(f"Provider `{provider}` is not available for API `{api_str}`") + + provider_spec = providers_for_api[provider_type] + if hasattr(provider_spec, "is_external") and provider_spec.is_external: + # this ensures we install the top level module for our external providers + if provider_spec.module: + if isinstance(provider_spec.module, str): + external_provider_deps.append(provider_spec.module) + else: + external_provider_deps.extend(provider_spec.module) + if hasattr(provider_spec, "pip_packages"): + deps.extend(provider_spec.pip_packages) + if hasattr(provider_spec, "container_image") and provider_spec.container_image: + raise ValueError("A stack's dependencies cannot have a container image") + + normal_deps = [] + special_deps = [] + for package in deps: + if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]): + special_deps.append(package) + else: + normal_deps.append(package) + + normal_deps.extend(additional_pip_packages or []) + + return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps)) + + +def print_pip_install_help(config: BuildConfig): + normal_deps, special_deps, _ = get_provider_dependencies(config) + + cprint( + f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}", + color="yellow", + file=sys.stderr, + ) + for special_dep in special_deps: + cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr) + print() diff --git a/llama_stack/core/client.py b/src/llama_stack/core/client.py similarity index 99% rename from llama_stack/core/client.py rename to src/llama_stack/core/client.py index 49e01794e..ba935a35e 100644 --- a/llama_stack/core/client.py +++ b/src/llama_stack/core/client.py @@ -15,7 +15,7 @@ import httpx from pydantic import BaseModel, parse_obj_as from termcolor import cprint -from llama_stack.providers.datatypes import RemoteProviderConfig +from llama_stack_api import RemoteProviderConfig _CLIENT_CLASSES = {} diff --git a/llama_stack/core/common.sh b/src/llama_stack/core/common.sh similarity index 100% rename from llama_stack/core/common.sh rename to src/llama_stack/core/common.sh diff --git a/llama_stack/core/configure.py b/src/llama_stack/core/configure.py similarity index 93% rename from llama_stack/core/configure.py rename to src/llama_stack/core/configure.py index 734839ea9..d738b8a61 100644 --- a/llama_stack/core/configure.py +++ b/src/llama_stack/core/configure.py @@ -17,11 +17,10 @@ from llama_stack.core.distribution import ( get_provider_registry, ) from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars -from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec logger = get_logger(name=__name__, category="core") @@ -194,19 +193,11 @@ def upgrade_from_routing_table( def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: - version = config_dict.get("version", None) - if version == LLAMA_STACK_RUN_CONFIG_VERSION: - processed_config_dict = replace_env_vars(config_dict) - return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) - if "routing_table" in config_dict: logger.info("Upgrading config...") config_dict = upgrade_from_routing_table(config_dict) config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION - if not config_dict.get("external_providers_dir", None): - config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR - processed_config_dict = replace_env_vars(config_dict) return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) diff --git a/llama_stack/core/__init__.py b/src/llama_stack/core/conversations/__init__.py similarity index 100% rename from llama_stack/core/__init__.py rename to src/llama_stack/core/conversations/__init__.py diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py new file mode 100644 index 000000000..90402439b --- /dev/null +++ b/src/llama_stack/core/conversations/conversations.py @@ -0,0 +1,312 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import secrets +import time +from typing import Any, Literal + +from pydantic import BaseModel, TypeAdapter + +from llama_stack.core.datatypes import AccessRule, StackRunConfig +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.log import get_logger +from llama_stack_api import ( + Conversation, + ConversationDeletedResource, + ConversationItem, + ConversationItemDeletedResource, + ConversationItemInclude, + ConversationItemList, + Conversations, + Metadata, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType + +logger = get_logger(name=__name__, category="openai_conversations") + + +class ConversationServiceConfig(BaseModel): + """Configuration for the built-in conversation service. + + :param run_config: Stack run configuration for resolving persistence + :param policy: Access control rules + """ + + run_config: StackRunConfig + policy: list[AccessRule] = [] + + +async def get_provider_impl(config: ConversationServiceConfig, deps: dict[Any, Any]): + """Get the conversation service implementation.""" + impl = ConversationServiceImpl(config, deps) + await impl.initialize() + return impl + + +class ConversationServiceImpl(Conversations): + """Built-in conversation service implementation using AuthorizedSqlStore.""" + + def __init__(self, config: ConversationServiceConfig, deps: dict[Any, Any]): + self.config = config + self.deps = deps + self.policy = config.policy + + # Use conversations store reference from run config + conversations_ref = config.run_config.storage.stores.conversations + if not conversations_ref: + raise ValueError("storage.stores.conversations must be configured in run config") + + base_sql_store = sqlstore_impl(conversations_ref) + self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) + + async def initialize(self) -> None: + """Initialize the store and create tables.""" + await self.sql_store.create_table( + "openai_conversations", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "created_at": ColumnType.INTEGER, + "items": ColumnType.JSON, + "metadata": ColumnType.JSON, + }, + ) + + await self.sql_store.create_table( + "conversation_items", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "conversation_id": ColumnType.STRING, + "created_at": ColumnType.INTEGER, + "item_data": ColumnType.JSON, + }, + ) + + async def create_conversation( + self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None + ) -> Conversation: + """Create a conversation.""" + random_bytes = secrets.token_bytes(24) + conversation_id = f"conv_{random_bytes.hex()}" + created_at = int(time.time()) + + record_data = { + "id": conversation_id, + "created_at": created_at, + "items": [], + "metadata": metadata, + } + + await self.sql_store.insert( + table="openai_conversations", + data=record_data, + ) + + if items: + item_records = [] + for item in items: + item_dict = item.model_dump() + item_id = self._get_or_generate_item_id(item, item_dict) + + item_record = { + "id": item_id, + "conversation_id": conversation_id, + "created_at": created_at, + "item_data": item_dict, + } + + item_records.append(item_record) + + await self.sql_store.insert(table="conversation_items", data=item_records) + + conversation = Conversation( + id=conversation_id, + created_at=created_at, + metadata=metadata, + object="conversation", + ) + + logger.debug(f"Created conversation {conversation_id}") + return conversation + + async def get_conversation(self, conversation_id: str) -> Conversation: + """Get a conversation with the given ID.""" + record = await self.sql_store.fetch_one(table="openai_conversations", where={"id": conversation_id}) + + if record is None: + raise ValueError(f"Conversation {conversation_id} not found") + + return Conversation( + id=record["id"], created_at=record["created_at"], metadata=record.get("metadata"), object="conversation" + ) + + async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation: + """Update a conversation's metadata with the given ID""" + await self.sql_store.update( + table="openai_conversations", data={"metadata": metadata}, where={"id": conversation_id} + ) + + return await self.get_conversation(conversation_id) + + async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource: + """Delete a conversation with the given ID.""" + await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id}) + + logger.debug(f"Deleted conversation {conversation_id}") + return ConversationDeletedResource(id=conversation_id) + + def _validate_conversation_id(self, conversation_id: str) -> None: + """Validate conversation ID format.""" + if not conversation_id.startswith("conv_"): + raise ValueError( + f"Invalid 'conversation_id': '{conversation_id}'. Expected an ID that begins with 'conv_'." + ) + + def _get_or_generate_item_id(self, item: ConversationItem, item_dict: dict) -> str: + """Get existing item ID or generate one if missing.""" + if item.id is None: + random_bytes = secrets.token_bytes(24) + if item.type == "message": + item_id = f"msg_{random_bytes.hex()}" + else: + item_id = f"item_{random_bytes.hex()}" + item_dict["id"] = item_id + return item_id + return item.id + + async def _get_validated_conversation(self, conversation_id: str) -> Conversation: + """Validate conversation ID and return the conversation if it exists.""" + self._validate_conversation_id(conversation_id) + return await self.get_conversation(conversation_id) + + async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList: + """Create (add) items to a conversation.""" + await self._get_validated_conversation(conversation_id) + + created_items = [] + base_time = int(time.time()) + + for i, item in enumerate(items): + item_dict = item.model_dump() + item_id = self._get_or_generate_item_id(item, item_dict) + + # make each timestamp unique to maintain order + created_at = base_time + i + + item_record = { + "id": item_id, + "conversation_id": conversation_id, + "created_at": created_at, + "item_data": item_dict, + } + + await self.sql_store.upsert( + table="conversation_items", + data=item_record, + conflict_columns=["id"], + ) + + created_items.append(item_dict) + + logger.debug(f"Created {len(created_items)} items in conversation {conversation_id}") + + # Convert created items (dicts) to proper ConversationItem types + adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) + response_items: list[ConversationItem] = [adapter.validate_python(item_dict) for item_dict in created_items] + + return ConversationItemList( + data=response_items, + first_id=created_items[0]["id"] if created_items else None, + last_id=created_items[-1]["id"] if created_items else None, + has_more=False, + ) + + async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem: + """Retrieve a conversation item.""" + if not conversation_id: + raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") + if not item_id: + raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}") + + # Get item from conversation_items table + record = await self.sql_store.fetch_one( + table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} + ) + + if record is None: + raise ValueError(f"Item {item_id} not found in conversation {conversation_id}") + + adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) + return adapter.validate_python(record["item_data"]) + + async def list_items( + self, + conversation_id: str, + after: str | None = None, + include: list[ConversationItemInclude] | None = None, + limit: int | None = None, + order: Literal["asc", "desc"] | None = None, + ) -> ConversationItemList: + """List items in the conversation.""" + if not conversation_id: + raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") + + # check if conversation exists + await self.get_conversation(conversation_id) + + result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id}) + records = result.data + + if order is not None and order == "asc": + records.sort(key=lambda x: x["created_at"]) + else: + records.sort(key=lambda x: x["created_at"], reverse=True) + + actual_limit = limit or 20 + + records = records[:actual_limit] + items = [record["item_data"] for record in records] + + adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) + response_items: list[ConversationItem] = [adapter.validate_python(item) for item in items] + + first_id = response_items[0].id if response_items else None + last_id = response_items[-1].id if response_items else None + + return ConversationItemList( + data=response_items, + first_id=first_id, + last_id=last_id, + has_more=False, + ) + + async def openai_delete_conversation_item( + self, conversation_id: str, item_id: str + ) -> ConversationItemDeletedResource: + """Delete a conversation item.""" + if not conversation_id: + raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") + if not item_id: + raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}") + + _ = await self._get_validated_conversation(conversation_id) + + record = await self.sql_store.fetch_one( + table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} + ) + + if record is None: + raise ValueError(f"Item {item_id} not found in conversation {conversation_id}") + + await self.sql_store.delete( + table="conversation_items", where={"id": item_id, "conversation_id": conversation_id} + ) + + logger.debug(f"Deleted item {item_id} from conversation {conversation_id}") + return ConversationItemDeletedResource(id=item_id) + + async def shutdown(self) -> None: + pass diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py new file mode 100644 index 000000000..f64286ef5 --- /dev/null +++ b/src/llama_stack/core/datatypes.py @@ -0,0 +1,626 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import StrEnum +from pathlib import Path +from typing import Annotated, Any, Literal, Self +from urllib.parse import urlparse + +from pydantic import BaseModel, Field, field_validator, model_validator + +from llama_stack.core.access_control.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ( + KVStoreReference, + StorageBackendType, + StorageConfig, +) +from llama_stack.log import LoggingConfig +from llama_stack_api import ( + Api, + Benchmark, + BenchmarkInput, + Dataset, + DatasetInput, + DatasetIO, + Eval, + Inference, + Model, + ModelInput, + ProviderSpec, + Resource, + Safety, + Scoring, + ScoringFn, + ScoringFnInput, + Shield, + ShieldInput, + ToolGroup, + ToolGroupInput, + ToolRuntime, + VectorIO, + VectorStore, + VectorStoreInput, +) + +LLAMA_STACK_BUILD_CONFIG_VERSION = 2 +LLAMA_STACK_RUN_CONFIG_VERSION = 2 + + +RoutingKey = str | list[str] + + +class RegistryEntrySource(StrEnum): + via_register_api = "via_register_api" + listed_from_provider = "listed_from_provider" + + +class User(BaseModel): + principal: str + # further attributes that may be used for access control decisions + attributes: dict[str, list[str]] | None = None + + def __init__(self, principal: str, attributes: dict[str, list[str]] | None): + super().__init__(principal=principal, attributes=attributes) + + +class ResourceWithOwner(Resource): + """Extension of Resource that adds an optional owner, i.e. the user that created the + resource. This can be used to constrain access to the resource.""" + + owner: User | None = None + source: RegistryEntrySource = RegistryEntrySource.via_register_api + + +# Use the extended Resource for all routable objects +class ModelWithOwner(Model, ResourceWithOwner): + pass + + +class ShieldWithOwner(Shield, ResourceWithOwner): + pass + + +class VectorStoreWithOwner(VectorStore, ResourceWithOwner): + pass + + +class DatasetWithOwner(Dataset, ResourceWithOwner): + pass + + +class ScoringFnWithOwner(ScoringFn, ResourceWithOwner): + pass + + +class BenchmarkWithOwner(Benchmark, ResourceWithOwner): + pass + + +class ToolGroupWithOwner(ToolGroup, ResourceWithOwner): + pass + + +RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup + +RoutableObjectWithProvider = Annotated[ + ModelWithOwner + | ShieldWithOwner + | VectorStoreWithOwner + | DatasetWithOwner + | ScoringFnWithOwner + | BenchmarkWithOwner + | ToolGroupWithOwner, + Field(discriminator="type"), +] + +RoutedProtocol = Inference | Safety | VectorIO | DatasetIO | Scoring | Eval | ToolRuntime + + +# Example: /inference, /safety +class AutoRoutedProviderSpec(ProviderSpec): + provider_type: str = "router" + config_class: str = "" + + container_image: str | None = None + routing_table_api: Api + module: str + provider_data_validator: str | None = Field( + default=None, + ) + + +# Example: /models, /shields +class RoutingTableProviderSpec(ProviderSpec): + provider_type: str = "routing_table" + config_class: str = "" + container_image: str | None = None + + router_api: Api + module: str + pip_packages: list[str] = Field(default_factory=list) + + +class Provider(BaseModel): + # provider_id of None means that the provider is not enabled - this happens + # when the provider is enabled via a conditional environment variable + provider_id: str | None + provider_type: str + config: dict[str, Any] = {} + module: str | None = Field( + default=None, + description=""" + Fully-qualified name of the external provider module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + """, + ) + + +class BuildProvider(BaseModel): + provider_type: str + module: str | None = Field( + default=None, + description=""" + Fully-qualified name of the external provider module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + """, + ) + + +class DistributionSpec(BaseModel): + description: str | None = Field( + default="", + description="Description of the distribution", + ) + container_image: str | None = None + providers: dict[str, list[BuildProvider]] = Field( + default_factory=dict, + description=""" + Provider Types for each of the APIs provided by this distribution. If you + select multiple providers, you should provide an appropriate 'routing_map' + in the runtime configuration to help route to the correct provider. + """, + ) + + +class OAuth2JWKSConfig(BaseModel): + # The JWKS URI for collecting public keys + uri: str + token: str | None = Field(default=None, description="token to authorise access to jwks") + key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates") + + +class OAuth2IntrospectionConfig(BaseModel): + url: str + client_id: str + client_secret: str + send_secret_in_body: bool = False + + +class AuthProviderType(StrEnum): + """Supported authentication provider types.""" + + OAUTH2_TOKEN = "oauth2_token" + GITHUB_TOKEN = "github_token" + CUSTOM = "custom" + KUBERNETES = "kubernetes" + + +class OAuth2TokenAuthConfig(BaseModel): + """Configuration for OAuth2 token authentication.""" + + type: Literal[AuthProviderType.OAUTH2_TOKEN] = AuthProviderType.OAUTH2_TOKEN + audience: str = Field(default="llama-stack") + verify_tls: bool = Field(default=True) + tls_cafile: Path | None = Field(default=None) + issuer: str | None = Field(default=None, description="The OIDC issuer URL.") + claims_mapping: dict[str, str] = Field( + default_factory=lambda: { + "sub": "roles", + "username": "roles", + "groups": "teams", + "team": "teams", + "project": "projects", + "tenant": "namespaces", + "namespace": "namespaces", + }, + ) + jwks: OAuth2JWKSConfig | None = Field(default=None, description="JWKS configuration") + introspection: OAuth2IntrospectionConfig | None = Field( + default=None, description="OAuth2 introspection configuration" + ) + + @classmethod + @field_validator("claims_mapping") + def validate_claims_mapping(cls, v): + for key, value in v.items(): + if not value: + raise ValueError(f"claims_mapping value cannot be empty: {key}") + return v + + @model_validator(mode="after") + def validate_mode(self) -> Self: + if not self.jwks and not self.introspection: + raise ValueError("One of jwks or introspection must be configured") + if self.jwks and self.introspection: + raise ValueError("At present only one of jwks or introspection should be configured") + return self + + +class CustomAuthConfig(BaseModel): + """Configuration for custom authentication.""" + + type: Literal[AuthProviderType.CUSTOM] = AuthProviderType.CUSTOM + endpoint: str = Field( + ..., + description="Custom authentication endpoint URL", + ) + + +class GitHubTokenAuthConfig(BaseModel): + """Configuration for GitHub token authentication.""" + + type: Literal[AuthProviderType.GITHUB_TOKEN] = AuthProviderType.GITHUB_TOKEN + github_api_base_url: str = Field( + default="https://api.github.com", + description="Base URL for GitHub API (use https://api.github.com for public GitHub)", + ) + claims_mapping: dict[str, str] = Field( + default_factory=lambda: { + "login": "roles", + "organizations": "teams", + }, + description="Mapping from GitHub user fields to access attributes", + ) + + +class KubernetesAuthProviderConfig(BaseModel): + """Configuration for Kubernetes authentication provider.""" + + type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES + api_server_url: str = Field( + default="https://kubernetes.default.svc", + description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)", + ) + verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates") + tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification") + claims_mapping: dict[str, str] = Field( + default_factory=lambda: { + "username": "roles", + "groups": "roles", + }, + description="Mapping of Kubernetes user claims to access attributes", + ) + + @field_validator("api_server_url") + @classmethod + def validate_api_server_url(cls, v): + parsed = urlparse(v) + if not parsed.scheme or not parsed.netloc: + raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}") + if parsed.scheme not in ["http", "https"]: + raise ValueError(f"api_server_url scheme must be http or https: {v}") + return v + + @field_validator("claims_mapping") + @classmethod + def validate_claims_mapping(cls, v): + for key, value in v.items(): + if not value: + raise ValueError(f"claims_mapping value cannot be empty: {key}") + return v + + +AuthProviderConfig = Annotated[ + OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig, + Field(discriminator="type"), +] + + +class AuthenticationConfig(BaseModel): + """Top-level authentication configuration.""" + + provider_config: AuthProviderConfig = Field( + ..., + description="Authentication provider configuration", + ) + access_policy: list[AccessRule] = Field( + default=[], + description="Rules for determining access to resources", + ) + + +class AuthenticationRequiredError(Exception): + pass + + +class QualifiedModel(BaseModel): + """A qualified model identifier, consisting of a provider ID and a model ID.""" + + provider_id: str + model_id: str + + +class VectorStoresConfig(BaseModel): + """Configuration for vector stores in the stack.""" + + default_provider_id: str | None = Field( + default=None, + description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.", + ) + default_embedding_model: QualifiedModel | None = Field( + default=None, + description="Default embedding model configuration for vector stores.", + ) + + +class SafetyConfig(BaseModel): + """Configuration for default moderations model.""" + + default_shield_id: str | None = Field( + default=None, + description="ID of the shield to use for when `model` is not specified in the `moderations` API request.", + ) + + +class QuotaPeriod(StrEnum): + DAY = "day" + + +class QuotaConfig(BaseModel): + kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") + anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") + authenticated_max_requests: int = Field( + default=1000, description="Max requests for authenticated clients per period" + ) + period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set") + + +class CORSConfig(BaseModel): + allow_origins: list[str] = Field(default_factory=list) + allow_origin_regex: str | None = Field(default=None) + allow_methods: list[str] = Field(default=["OPTIONS"]) + allow_headers: list[str] = Field(default_factory=list) + allow_credentials: bool = Field(default=False) + expose_headers: list[str] = Field(default_factory=list) + max_age: int = Field(default=600, ge=0) + + @model_validator(mode="after") + def validate_credentials_config(self) -> Self: + if self.allow_credentials and (self.allow_origins == ["*"] or "*" in self.allow_origins): + raise ValueError("Cannot use wildcard origins with credentials enabled") + return self + + +def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | None: + if cors_config is False or cors_config is None: + return None + + if cors_config is True: + # dev mode: allow localhost on any port + return CORSConfig( + allow_origins=[], + allow_origin_regex=r"https?://localhost:\d+", + allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], + allow_headers=["Content-Type", "Authorization", "X-Requested-With"], + ) + + if isinstance(cors_config, CORSConfig): + return cors_config + + raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}") + + +class RegisteredResources(BaseModel): + """Registry of resources available in the distribution.""" + + models: list[ModelInput] = Field(default_factory=list) + shields: list[ShieldInput] = Field(default_factory=list) + vector_stores: list[VectorStoreInput] = Field(default_factory=list) + datasets: list[DatasetInput] = Field(default_factory=list) + scoring_fns: list[ScoringFnInput] = Field(default_factory=list) + benchmarks: list[BenchmarkInput] = Field(default_factory=list) + tool_groups: list[ToolGroupInput] = Field(default_factory=list) + + +class ServerConfig(BaseModel): + port: int = Field( + default=8321, + description="Port to listen on", + ge=1024, + le=65535, + ) + tls_certfile: str | None = Field( + default=None, + description="Path to TLS certificate file for HTTPS", + ) + tls_keyfile: str | None = Field( + default=None, + description="Path to TLS key file for HTTPS", + ) + tls_cafile: str | None = Field( + default=None, + description="Path to TLS CA file for HTTPS with mutual TLS authentication", + ) + auth: AuthenticationConfig | None = Field( + default=None, + description="Authentication configuration for the server", + ) + host: str | None = Field( + default=None, + description="The host the server should listen on", + ) + quota: QuotaConfig | None = Field( + default=None, + description="Per client quota request configuration", + ) + cors: bool | CORSConfig | None = Field( + default=None, + description="CORS configuration for cross-origin requests. Can be:\n" + "- true: Enable localhost CORS for development\n" + "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration", + ) + workers: int = Field( + default=1, + description="Number of workers to use for the server", + ) + + +class StackRunConfig(BaseModel): + version: int = LLAMA_STACK_RUN_CONFIG_VERSION + + image_name: str = Field( + ..., + description=""" +Reference to the distribution this package refers to. For unregistered (adhoc) packages, +this could be just a hash +""", + ) + container_image: str | None = Field( + default=None, + description="Reference to the container image if this package refers to a container", + ) + apis: list[str] = Field( + default_factory=list, + description=""" +The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", + ) + + providers: dict[str, list[Provider]] = Field( + description=""" +One or more providers to use for each API. The same provider_type (e.g., meta-reference) +can be instantiated multiple times (with different configs) if necessary. +""", + ) + storage: StorageConfig = Field( + description="Catalog of named storage backends and references available to the stack", + ) + + registered_resources: RegisteredResources = Field( + default_factory=RegisteredResources, + description="Registry of resources available in the distribution", + ) + + logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging") + + server: ServerConfig = Field( + default_factory=ServerConfig, + description="Configuration for the HTTP(S) server", + ) + + external_providers_dir: Path | None = Field( + default=None, + description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.", + ) + + external_apis_dir: Path | None = Field( + default=None, + description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", + ) + + vector_stores: VectorStoresConfig | None = Field( + default=None, + description="Configuration for vector stores, including default embedding model", + ) + + safety: SafetyConfig | None = Field( + default=None, + description="Configuration for default moderations model", + ) + + @field_validator("external_providers_dir") + @classmethod + def validate_external_providers_dir(cls, v): + if v is None: + return None + if isinstance(v, str): + return Path(v) + return v + + @model_validator(mode="after") + def validate_server_stores(self) -> "StackRunConfig": + backend_map = self.storage.backends + stores = self.storage.stores + kv_backends = { + name + for name, cfg in backend_map.items() + if cfg.type + in { + StorageBackendType.KV_REDIS, + StorageBackendType.KV_SQLITE, + StorageBackendType.KV_POSTGRES, + StorageBackendType.KV_MONGODB, + } + } + sql_backends = { + name + for name, cfg in backend_map.items() + if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES} + } + + def _ensure_backend(reference, expected_set, store_name: str) -> None: + if reference is None: + return + backend_name = reference.backend + if backend_name not in backend_map: + raise ValueError( + f"{store_name} references unknown backend '{backend_name}'. " + f"Available backends: {sorted(backend_map)}" + ) + if backend_name not in expected_set: + raise ValueError( + f"{store_name} references backend '{backend_name}' of type " + f"'{backend_map[backend_name].type.value}', but a backend of type " + f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." + ) + + _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata") + _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") + _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") + _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") + _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts") + return self + + +class BuildConfig(BaseModel): + version: int = LLAMA_STACK_BUILD_CONFIG_VERSION + + distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") + image_type: str = Field( + default="venv", + description="Type of package to build (container | venv)", + ) + image_name: str | None = Field( + default=None, + description="Name of the distribution to build", + ) + external_providers_dir: Path | None = Field( + default=None, + description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. " + "pip_packages MUST contain the provider package name.", + ) + additional_pip_packages: list[str] = Field( + default_factory=list, + description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.", + ) + external_apis_dir: Path | None = Field( + default=None, + description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", + ) + + @field_validator("external_providers_dir") + @classmethod + def validate_external_providers_dir(cls, v): + if v is None: + return None + if isinstance(v, str): + return Path(v) + return v diff --git a/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py similarity index 99% rename from llama_stack/core/distribution.py rename to src/llama_stack/core/distribution.py index 82cbcf984..658c75ef2 100644 --- a/llama_stack/core/distribution.py +++ b/src/llama_stack/core/distribution.py @@ -15,7 +15,7 @@ from pydantic import BaseModel from llama_stack.core.datatypes import BuildConfig, DistributionSpec from llama_stack.core.external import load_external_apis from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, @@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import ( logger = get_logger(name=__name__, category="core") -INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry} +INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations} def stack_apis() -> list[Api]: diff --git a/llama_stack/core/external.py b/src/llama_stack/core/external.py similarity index 96% rename from llama_stack/core/external.py rename to src/llama_stack/core/external.py index 12e9824ad..d1a2d6e42 100644 --- a/llama_stack/core/external.py +++ b/src/llama_stack/core/external.py @@ -7,9 +7,9 @@ import yaml -from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.core.datatypes import BuildConfig, StackRunConfig from llama_stack.log import get_logger +from llama_stack_api import Api, ExternalApiSpec logger = get_logger(name=__name__, category="core") diff --git a/llama_stack/core/id_generation.py b/src/llama_stack/core/id_generation.py similarity index 100% rename from llama_stack/core/id_generation.py rename to src/llama_stack/core/id_generation.py diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py new file mode 100644 index 000000000..272c9d1bc --- /dev/null +++ b/src/llama_stack/core/inspect.py @@ -0,0 +1,98 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from importlib.metadata import version + +from pydantic import BaseModel + +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.external import load_external_apis +from llama_stack.core.server.routes import get_all_api_routes +from llama_stack_api import ( + HealthInfo, + HealthStatus, + Inspect, + ListRoutesResponse, + RouteInfo, + VersionInfo, +) + + +class DistributionInspectConfig(BaseModel): + run_config: StackRunConfig + + +async def get_provider_impl(config, deps): + impl = DistributionInspectImpl(config, deps) + await impl.initialize() + return impl + + +class DistributionInspectImpl(Inspect): + def __init__(self, config: DistributionInspectConfig, deps): + self.config = config + self.deps = deps + + async def initialize(self) -> None: + pass + + async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse: + run_config: StackRunConfig = self.config.run_config + + # Helper function to determine if a route should be included based on api_filter + def should_include_route(webmethod) -> bool: + if api_filter is None: + # Default: only non-deprecated APIs + return not webmethod.deprecated + elif api_filter == "deprecated": + # Special filter: show deprecated routes regardless of their actual level + return bool(webmethod.deprecated) + else: + # Filter by API level (non-deprecated routes only) + return not webmethod.deprecated and webmethod.level == api_filter + + ret = [] + external_apis = load_external_apis(run_config) + all_endpoints = get_all_api_routes(external_apis) + for api, endpoints in all_endpoints.items(): + # Always include provider and inspect APIs, filter others based on run config + if api.value in ["providers", "inspect"]: + ret.extend( + [ + RouteInfo( + route=e.path, + method=next(iter([m for m in e.methods if m != "HEAD"])), + provider_types=[], # These APIs don't have "real" providers - they're internal to the stack + ) + for e, webmethod in endpoints + if e.methods is not None and should_include_route(webmethod) + ] + ) + else: + providers = run_config.providers.get(api.value, []) + if providers: # Only process if there are providers for this API + ret.extend( + [ + RouteInfo( + route=e.path, + method=next(iter([m for m in e.methods if m != "HEAD"])), + provider_types=[p.provider_type for p in providers], + ) + for e, webmethod in endpoints + if e.methods is not None and should_include_route(webmethod) + ] + ) + + return ListRoutesResponse(data=ret) + + async def health(self) -> HealthInfo: + return HealthInfo(status=HealthStatus.OK) + + async def version(self) -> VersionInfo: + return VersionInfo(version=version("llama-stack")) + + async def shutdown(self) -> None: + pass diff --git a/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py similarity index 90% rename from llama_stack/core/library_client.py rename to src/llama_stack/core/library_client.py index c64b9a391..7ae29ad0d 100644 --- a/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -18,38 +18,38 @@ from typing import Any, TypeVar, Union, get_args, get_origin import httpx import yaml from fastapi import Response as FastAPIResponse -from llama_stack_client import ( - NOT_GIVEN, - APIResponse, - AsyncAPIResponse, - AsyncLlamaStackClient, - AsyncStream, - LlamaStackClient, -) + +from llama_stack.core.utils.type_inspection import is_unwrapped_body_param + +try: + from llama_stack_client import ( + NOT_GIVEN, + APIResponse, + AsyncAPIResponse, + AsyncLlamaStackClient, + AsyncStream, + LlamaStackClient, + ) +except ImportError as e: + raise ImportError( + "llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`." + ) from e + from pydantic import BaseModel, TypeAdapter from rich.console import Console from termcolor import cprint from llama_stack.core.build import print_pip_install_help from llama_stack.core.configure import parse_and_maybe_upgrade_config -from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec -from llama_stack.core.request_headers import ( - PROVIDER_DATA_VAR, - request_provider_data_context, -) +from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec +from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context from llama_stack.core.resolver import ProviderRegistry from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls -from llama_stack.core.stack import ( - Stack, - get_stack_run_config_from_distro, - replace_env_vars, -) +from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.exec import in_notebook from llama_stack.log import get_logger, setup_logging -from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace -from llama_stack.strong_typing.inspection import is_unwrapped_body_param logger = get_logger(name=__name__, category="core") @@ -202,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): super().__init__() # Initialize logging from environment variables first setup_logging() - - # when using the library client, we should not log to console since many - # of our logs are intended for server-side usage - if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None): - current_sinks = sinks_from_env.strip().lower().split(",") - os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console") - if in_notebook(): import nest_asyncio @@ -293,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): raise _e assert self.impls is not None - if Api.telemetry in self.impls: - setup_logger(self.impls[Api.telemetry]) if not os.environ.get("PYTEST_CURRENT_TEST"): console = Console() @@ -381,16 +372,16 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls) body |= path_params + # Pass through params that aren't already handled as path params + if options.params: + extra_query_params = {k: v for k, v in options.params.items() if k not in path_params} + if extra_query_params: + body["extra_query"] = extra_query_params + body, field_names = self._handle_file_uploads(options, body) body = self._convert_body(matched_func, body, exclude_params=set(field_names)) - - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - try: - result = await matched_func(**body) - finally: - await end_trace() + result = await matched_func(**body) # Handle FastAPI Response objects (e.g., from file content retrieval) if isinstance(result, FastAPIResponse): @@ -449,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): # Prepare body for the function call (handles both Pydantic and traditional params) body = self._convert_body(func, body) - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - async def gen(): - try: - async for chunk in await func(**body): - data = json.dumps(convert_pydantic_to_json_value(chunk)) - sse_event = f"data: {data}\n\n" - yield sse_event.encode("utf-8") - finally: - await end_trace() + async for chunk in await func(**body): + data = json.dumps(convert_pydantic_to_json_value(chunk)) + sse_event = f"data: {data}\n\n" + yield sse_event.encode("utf-8") - wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]) + wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR]) mock_response = httpx.Response( status_code=httpx.codes.OK, diff --git a/llama_stack/core/access_control/__init__.py b/src/llama_stack/core/prompts/__init__.py similarity index 100% rename from llama_stack/core/access_control/__init__.py rename to src/llama_stack/core/prompts/__init__.py diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py new file mode 100644 index 000000000..ff67ad138 --- /dev/null +++ b/src/llama_stack/core/prompts/prompts.py @@ -0,0 +1,235 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from typing import Any + +from pydantic import BaseModel + +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.kvstore import KVStore, kvstore_impl +from llama_stack_api import ListPromptsResponse, Prompt, Prompts + + +class PromptServiceConfig(BaseModel): + """Configuration for the built-in prompt service. + + :param run_config: Stack run configuration containing distribution info + """ + + run_config: StackRunConfig + + +async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]): + """Get the prompt service implementation.""" + impl = PromptServiceImpl(config, deps) + await impl.initialize() + return impl + + +class PromptServiceImpl(Prompts): + """Built-in prompt service implementation using KVStore.""" + + def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]): + self.config = config + self.deps = deps + self.kvstore: KVStore + + async def initialize(self) -> None: + # Use prompts store reference from run config + prompts_ref = self.config.run_config.storage.stores.prompts + if not prompts_ref: + raise ValueError("storage.stores.prompts must be configured in run config") + self.kvstore = await kvstore_impl(prompts_ref) + + def _get_default_key(self, prompt_id: str) -> str: + """Get the KVStore key that stores the default version number.""" + return f"prompts:v1:{prompt_id}:default" + + async def _get_prompt_key(self, prompt_id: str, version: int | None = None) -> str: + """Get the KVStore key for prompt data, returning default version if applicable.""" + if version: + return self._get_version_key(prompt_id, str(version)) + + default_key = self._get_default_key(prompt_id) + resolved_version = await self.kvstore.get(default_key) + if resolved_version is None: + raise ValueError(f"Prompt {prompt_id}:default not found") + return self._get_version_key(prompt_id, resolved_version) + + def _get_version_key(self, prompt_id: str, version: str) -> str: + """Get the KVStore key for a specific prompt version.""" + return f"prompts:v1:{prompt_id}:{version}" + + def _get_list_key_prefix(self) -> str: + """Get the key prefix for listing prompts.""" + return "prompts:v1:" + + def _serialize_prompt(self, prompt: Prompt) -> str: + """Serialize a prompt to JSON string for storage.""" + return json.dumps( + { + "prompt_id": prompt.prompt_id, + "prompt": prompt.prompt, + "version": prompt.version, + "variables": prompt.variables or [], + "is_default": prompt.is_default, + } + ) + + def _deserialize_prompt(self, data: str) -> Prompt: + """Deserialize a prompt from JSON string.""" + obj = json.loads(data) + return Prompt( + prompt_id=obj["prompt_id"], + prompt=obj["prompt"], + version=obj["version"], + variables=obj.get("variables", []), + is_default=obj.get("is_default", False), + ) + + async def list_prompts(self) -> ListPromptsResponse: + """List all prompts (default versions only).""" + prefix = self._get_list_key_prefix() + keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") + + prompts = [] + for key in keys: + if key.endswith(":default"): + try: + default_version = await self.kvstore.get(key) + if default_version: + prompt_id = key.replace(prefix, "").replace(":default", "") + version_key = self._get_version_key(prompt_id, default_version) + data = await self.kvstore.get(version_key) + if data: + prompt = self._deserialize_prompt(data) + prompts.append(prompt) + except (json.JSONDecodeError, KeyError): + continue + + prompts.sort(key=lambda p: p.prompt_id or "", reverse=True) + return ListPromptsResponse(data=prompts) + + async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt: + """Get a prompt by its identifier and optional version.""" + key = await self._get_prompt_key(prompt_id, version) + data = await self.kvstore.get(key) + if data is None: + raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found") + return self._deserialize_prompt(data) + + async def create_prompt( + self, + prompt: str, + variables: list[str] | None = None, + ) -> Prompt: + """Create a new prompt.""" + if variables is None: + variables = [] + + prompt_obj = Prompt( + prompt_id=Prompt.generate_prompt_id(), + prompt=prompt, + version=1, + variables=variables, + ) + + version_key = self._get_version_key(prompt_obj.prompt_id, str(prompt_obj.version)) + data = self._serialize_prompt(prompt_obj) + await self.kvstore.set(version_key, data) + + default_key = self._get_default_key(prompt_obj.prompt_id) + await self.kvstore.set(default_key, str(prompt_obj.version)) + + return prompt_obj + + async def update_prompt( + self, + prompt_id: str, + prompt: str, + version: int, + variables: list[str] | None = None, + set_as_default: bool = True, + ) -> Prompt: + """Update an existing prompt (increments version).""" + if version < 1: + raise ValueError("Version must be >= 1") + if variables is None: + variables = [] + + prompt_versions = await self.list_prompt_versions(prompt_id) + latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version)) + + if version and latest_prompt.version != version: + raise ValueError( + f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request." + ) + + current_version = latest_prompt.version if version is None else version + new_version = current_version + 1 + + updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables) + + version_key = self._get_version_key(prompt_id, str(new_version)) + data = self._serialize_prompt(updated_prompt) + await self.kvstore.set(version_key, data) + + if set_as_default: + await self.set_default_version(prompt_id, new_version) + + return updated_prompt + + async def delete_prompt(self, prompt_id: str) -> None: + """Delete a prompt and all its versions.""" + await self.get_prompt(prompt_id) + + prefix = f"prompts:v1:{prompt_id}:" + keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") + + for key in keys: + await self.kvstore.delete(key) + + async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse: + """List all versions of a specific prompt.""" + prefix = f"prompts:v1:{prompt_id}:" + keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff") + + default_version = None + prompts = [] + + for key in keys: + data = await self.kvstore.get(key) + if key.endswith(":default"): + default_version = data + else: + if data: + prompt_obj = self._deserialize_prompt(data) + prompts.append(prompt_obj) + + if not prompts: + raise ValueError(f"Prompt {prompt_id} not found") + + for prompt in prompts: + prompt.is_default = str(prompt.version) == default_version + + prompts.sort(key=lambda x: x.version) + return ListPromptsResponse(data=prompts) + + async def set_default_version(self, prompt_id: str, version: int) -> Prompt: + """Set which version of a prompt should be the default, If not set. the default is the latest.""" + version_key = self._get_version_key(prompt_id, str(version)) + data = await self.kvstore.get(version_key) + if data is None: + raise ValueError(f"Prompt {prompt_id} version {version} not found") + + default_key = self._get_default_key(prompt_id) + await self.kvstore.set(default_key, str(version)) + + return self._deserialize_prompt(data) + + async def shutdown(self) -> None: + pass diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py new file mode 100644 index 000000000..e3fe3c7b3 --- /dev/null +++ b/src/llama_stack/core/providers.py @@ -0,0 +1,136 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +from typing import Any + +from pydantic import BaseModel + +from llama_stack.log import get_logger +from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers + +from .datatypes import StackRunConfig +from .utils.config import redact_sensitive_fields + +logger = get_logger(name=__name__, category="core") + + +class ProviderImplConfig(BaseModel): + run_config: StackRunConfig + + +async def get_provider_impl(config, deps): + impl = ProviderImpl(config, deps) + await impl.initialize() + return impl + + +class ProviderImpl(Providers): + def __init__(self, config, deps): + self.config = config + self.deps = deps + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + logger.debug("ProviderImpl.shutdown") + pass + + async def list_providers(self) -> ListProvidersResponse: + run_config = self.config.run_config + safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump())) + providers_health = await self.get_providers_health() + ret = [] + for api, providers in safe_config.providers.items(): + for p in providers: + # Skip providers that are not enabled + if p.provider_id is None: + continue + ret.append( + ProviderInfo( + api=api, + provider_id=p.provider_id, + provider_type=p.provider_type, + config=p.config, + health=providers_health.get(api, {}).get( + p.provider_id, + HealthResponse( + status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check" + ), + ), + ) + ) + + return ListProvidersResponse(data=ret) + + async def inspect_provider(self, provider_id: str) -> ProviderInfo: + all_providers = await self.list_providers() + for p in all_providers.data: + if p.provider_id == provider_id: + return p + + raise ValueError(f"Provider {provider_id} not found") + + async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]: + """Get health status for all providers. + + Returns: + Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses. + Each API maps to a dictionary of provider IDs to their health responses. + """ + providers_health: dict[str, dict[str, HealthResponse]] = {} + + # The timeout has to be long enough to allow all the providers to be checked, especially in + # the case of the inference router health check since it checks all registered inference + # providers. + # The timeout must not be equal to the one set by health method for a given implementation, + # otherwise we will miss some providers. + timeout = 3.0 + + async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None: + # Skip special implementations (inspect/providers) that don't have provider specs + if not hasattr(impl, "__provider_spec__"): + return None + api_name = impl.__provider_spec__.api.name + if not hasattr(impl, "health"): + return ( + api_name, + HealthResponse( + status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check" + ), + ) + + try: + health = await asyncio.wait_for(impl.health(), timeout=timeout) + return api_name, health + except TimeoutError: + return ( + api_name, + HealthResponse( + status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds" + ), + ) + except Exception as e: + return ( + api_name, + HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"), + ) + + # Create tasks for all providers + tasks = [check_provider_health(impl) for impl in self.deps.values()] + + # Wait for all health checks to complete + results = await asyncio.gather(*tasks) + + # Organize results by API and provider ID + for result in results: + if result is None: # Skip special implementations + continue + api_name, health_response = result + providers_health[api_name] = health_response + + return providers_health diff --git a/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py similarity index 100% rename from llama_stack/core/request_headers.py rename to src/llama_stack/core/request_headers.py diff --git a/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py similarity index 87% rename from llama_stack/core/resolver.py rename to src/llama_stack/core/resolver.py index 0b63815ea..15720df95 100644 --- a/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -8,30 +8,6 @@ import importlib.metadata import inspect from typing import Any -from llama_stack.apis.agents import Agents -from llama_stack.apis.batches import Batches -from llama_stack.apis.benchmarks import Benchmarks -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.datatypes import ExternalApiSpec -from llama_stack.apis.eval import Eval -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InferenceProvider -from llama_stack.apis.inspect import Inspect -from llama_stack.apis.models import Models -from llama_stack.apis.post_training import PostTraining -from llama_stack.apis.prompts import Prompts -from llama_stack.apis.providers import Providers as ProvidersAPI -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFunctions -from llama_stack.apis.shields import Shields -from llama_stack.apis.telemetry import Telemetry -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( AccessRule, @@ -45,18 +21,44 @@ from llama_stack.core.external import load_external_apis from llama_stack.core.store import DistributionRegistry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( + LLAMA_STACK_API_V1ALPHA, + Agents, Api, + Batches, + Benchmarks, BenchmarksProtocolPrivate, + Conversations, + DatasetIO, + Datasets, DatasetsProtocolPrivate, - InlineProviderSpec, + Eval, + ExternalApiSpec, + Files, + Inference, + InferenceProvider, + Inspect, + Models, ModelsProtocolPrivate, + PostTraining, + Prompts, ProviderSpec, RemoteProviderConfig, RemoteProviderSpec, + Safety, + Scoring, + ScoringFunctions, ScoringFunctionsProtocolPrivate, + Shields, ShieldsProtocolPrivate, + ToolGroups, ToolGroupsProtocolPrivate, + ToolRuntime, + VectorIO, + VectorStore, +) +from llama_stack_api import ( + Providers as ProvidersAPI, ) logger = get_logger(name=__name__, category="core") @@ -98,7 +100,6 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.files: Files, Api.prompts: Prompts, Api.conversations: Conversations, - Api.telemetry: Telemetry, } if external_apis: @@ -241,24 +242,6 @@ def validate_and_prepare_providers( key = api_str if api not in router_apis else f"inner-{api_str}" providers_with_specs[key] = specs - # TODO: remove this logic, telemetry should not have providers. - # if telemetry has been enabled in the config initialize our internal impl - # telemetry is not an external API so it SHOULD NOT be auto-routed. - if run_config.telemetry.enabled: - specs = {} - p = InlineProviderSpec( - api=Api.telemetry, - provider_type="inline::meta-reference", - pip_packages=[], - optional_api_dependencies=[Api.datasetio], - module="llama_stack.providers.inline.telemetry.meta_reference", - config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig", - description="Meta's reference implementation of telemetry and observability using OpenTelemetry.", - ) - spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference") - specs["meta-reference"] = spec - providers_with_specs["telemetry"] = specs - return providers_with_specs @@ -409,8 +392,6 @@ async def instantiate_provider( args = [config, deps] if "policy" in inspect.signature(getattr(module, method)).parameters: args.append(policy) - if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry: - args.append(run_config.telemetry.enabled) fn = getattr(module, method) impl = await fn(*args) diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py new file mode 100644 index 000000000..c6f8a7ac2 --- /dev/null +++ b/src/llama_stack/core/routers/__init__.py @@ -0,0 +1,96 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import ( + AccessRule, + RoutedProtocol, +) +from llama_stack.core.stack import StackRunConfig +from llama_stack.core.store import DistributionRegistry +from llama_stack.providers.utils.inference.inference_store import InferenceStore +from llama_stack_api import Api, RoutingTable + + +async def get_routing_table_impl( + api: Api, + impls_by_provider_id: dict[str, RoutedProtocol], + _deps, + dist_registry: DistributionRegistry, + policy: list[AccessRule], +) -> Any: + from ..routing_tables.benchmarks import BenchmarksRoutingTable + from ..routing_tables.datasets import DatasetsRoutingTable + from ..routing_tables.models import ModelsRoutingTable + from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable + from ..routing_tables.shields import ShieldsRoutingTable + from ..routing_tables.toolgroups import ToolGroupsRoutingTable + from ..routing_tables.vector_stores import VectorStoresRoutingTable + + api_to_tables = { + "models": ModelsRoutingTable, + "shields": ShieldsRoutingTable, + "datasets": DatasetsRoutingTable, + "scoring_functions": ScoringFunctionsRoutingTable, + "benchmarks": BenchmarksRoutingTable, + "tool_groups": ToolGroupsRoutingTable, + "vector_stores": VectorStoresRoutingTable, + } + + if api.value not in api_to_tables: + raise ValueError(f"API {api.value} not found in router map") + + impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy) + + await impl.initialize() + return impl + + +async def get_auto_router_impl( + api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule] +) -> Any: + from .datasets import DatasetIORouter + from .eval_scoring import EvalRouter, ScoringRouter + from .inference import InferenceRouter + from .safety import SafetyRouter + from .tool_runtime import ToolRuntimeRouter + from .vector_io import VectorIORouter + + api_to_routers = { + "vector_io": VectorIORouter, + "inference": InferenceRouter, + "safety": SafetyRouter, + "datasetio": DatasetIORouter, + "scoring": ScoringRouter, + "eval": EvalRouter, + "tool_runtime": ToolRuntimeRouter, + } + if api.value not in api_to_routers: + raise ValueError(f"API {api.value} not found in router map") + + api_to_dep_impl = {} + # TODO: move pass configs to routers instead + if api == Api.inference: + inference_ref = run_config.storage.stores.inference + if not inference_ref: + raise ValueError("storage.stores.inference must be configured in run config") + + inference_store = InferenceStore( + reference=inference_ref, + policy=policy, + ) + await inference_store.initialize() + api_to_dep_impl["store"] = inference_store + elif api == Api.vector_io: + api_to_dep_impl["vector_stores_config"] = run_config.vector_stores + elif api == Api.safety: + api_to_dep_impl["safety_config"] = run_config.safety + + impl = api_to_routers[api.value](routing_table, **api_to_dep_impl) + + await impl.initialize() + return impl diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py new file mode 100644 index 000000000..b6a5f3b96 --- /dev/null +++ b/src/llama_stack/core/routers/datasets.py @@ -0,0 +1,70 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.log import get_logger +from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable + +logger = get_logger(name=__name__, category="core::routers") + + +class DatasetIORouter(DatasetIO): + def __init__( + self, + routing_table: RoutingTable, + ) -> None: + logger.debug("Initializing DatasetIORouter") + self.routing_table = routing_table + + async def initialize(self) -> None: + logger.debug("DatasetIORouter.initialize") + pass + + async def shutdown(self) -> None: + logger.debug("DatasetIORouter.shutdown") + pass + + async def register_dataset( + self, + purpose: DatasetPurpose, + source: DataSource, + metadata: dict[str, Any] | None = None, + dataset_id: str | None = None, + ) -> None: + logger.debug( + f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}", + ) + await self.routing_table.register_dataset( + purpose=purpose, + source=source, + metadata=metadata, + dataset_id=dataset_id, + ) + + async def iterrows( + self, + dataset_id: str, + start_index: int | None = None, + limit: int | None = None, + ) -> PaginatedResponse: + logger.debug( + f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}", + ) + provider = await self.routing_table.get_provider_impl(dataset_id) + return await provider.iterrows( + dataset_id=dataset_id, + start_index=start_index, + limit=limit, + ) + + async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: + logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows") + provider = await self.routing_table.get_provider_impl(dataset_id) + return await provider.append_rows( + dataset_id=dataset_id, + rows=rows, + ) diff --git a/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py similarity index 96% rename from llama_stack/core/routers/eval_scoring.py rename to src/llama_stack/core/routers/eval_scoring.py index ffca81bf0..4d7269180 100644 --- a/llama_stack/core/routers/eval_scoring.py +++ b/src/llama_stack/core/routers/eval_scoring.py @@ -6,15 +6,18 @@ from typing import Any -from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job -from llama_stack.apis.scoring import ( +from llama_stack.log import get_logger +from llama_stack_api import ( + BenchmarkConfig, + Eval, + EvaluateResponse, + Job, + RoutingTable, ScoreBatchResponse, ScoreResponse, Scoring, ScoringFnParams, ) -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py new file mode 100644 index 000000000..8a7ffaa5f --- /dev/null +++ b/src/llama_stack/core/routers/inference.py @@ -0,0 +1,372 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import time +from collections.abc import AsyncIterator +from typing import Annotated, Any + +from fastapi import Body +from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam +from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam +from pydantic import TypeAdapter + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.inference_store import InferenceStore +from llama_stack_api import ( + HealthResponse, + HealthStatus, + Inference, + ListOpenAIChatCompletionResponse, + ModelNotFoundError, + ModelType, + ModelTypeError, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChoice, + OpenAIChoiceLogprobs, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, + OpenAICompletionWithInputMessages, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + Order, + RerankResponse, + RoutingTable, +) + +logger = get_logger(name=__name__, category="core::routers") + + +class InferenceRouter(Inference): + """Routes to an provider based on the model""" + + def __init__( + self, + routing_table: RoutingTable, + store: InferenceStore | None = None, + ) -> None: + logger.debug("Initializing InferenceRouter") + self.routing_table = routing_table + self.store = store + + async def initialize(self) -> None: + logger.debug("InferenceRouter.initialize") + + async def shutdown(self) -> None: + logger.debug("InferenceRouter.shutdown") + if self.store: + try: + await self.store.shutdown() + except Exception as e: + logger.warning(f"Error during InferenceStore shutdown: {e}") + + async def register_model( + self, + model_id: str, + provider_model_id: str | None = None, + provider_id: str | None = None, + metadata: dict[str, Any] | None = None, + model_type: ModelType | None = None, + ) -> None: + logger.debug( + f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}", + ) + await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type) + + async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]: + model = await self.routing_table.get_object_by_identifier("model", model_id) + if model: + if model.model_type != expected_model_type: + raise ModelTypeError(model_id, model.model_type, expected_model_type) + + provider = await self.routing_table.get_provider_impl(model.identifier) + return provider, model.provider_resource_id + + splits = model_id.split("/", maxsplit=1) + if len(splits) != 2: + raise ModelNotFoundError(model_id) + + provider_id, provider_resource_id = splits + if provider_id not in self.routing_table.impls_by_provider_id: + logger.warning(f"Provider {provider_id} not found for model {model_id}") + raise ModelNotFoundError(model_id) + + return self.routing_table.impls_by_provider_id[provider_id], provider_resource_id + + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + logger.debug(f"InferenceRouter.rerank: {model}") + provider, provider_resource_id = await self._get_model_provider(model, ModelType.rerank) + return await provider.rerank(provider_resource_id, query, items, max_num_results) + + async def openai_completion( + self, + params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)], + ) -> OpenAICompletion: + logger.debug( + f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}", + ) + request_model_id = params.model + provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm) + params.model = provider_resource_id + + if params.stream: + return await provider.openai_completion(params) + + response = await provider.openai_completion(params) + response.model = request_model_id + return response + + async def openai_chat_completion( + self, + params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)], + ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: + logger.debug( + f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}", + ) + request_model_id = params.model + provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm) + params.model = provider_resource_id + + # Use the OpenAI client for a bit of extra input validation without + # exposing the OpenAI client itself as part of our API surface + if params.tool_choice: + TypeAdapter(OpenAIChatCompletionToolChoiceOptionParam).validate_python(params.tool_choice) + if params.tools is None: + raise ValueError("'tool_choice' is only allowed when 'tools' is also provided") + if params.tools: + for tool in params.tools: + TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool) + + # Some providers make tool calls even when tool_choice is "none" + # so just clear them both out to avoid unexpected tool calls + if params.tool_choice == "none" and params.tools is not None: + params.tool_choice = None + params.tools = None + + if params.stream: + response_stream = await provider.openai_chat_completion(params) + + # For streaming, the provider returns AsyncIterator[OpenAIChatCompletionChunk] + # We need to add metrics to each chunk and store the final completion + return self.stream_tokens_and_compute_metrics_openai_chat( + response=response_stream, + fully_qualified_model_id=request_model_id, + provider_id=provider.__provider_id__, + messages=params.messages, + ) + + response = await self._nonstream_openai_chat_completion(provider, params) + response.model = request_model_id + + # Store the response with the ID that will be returned to the client + if self.store: + asyncio.create_task(self.store.store_chat_completion(response, params.messages)) + + return response + + async def openai_embeddings( + self, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], + ) -> OpenAIEmbeddingsResponse: + logger.debug( + f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", + ) + request_model_id = params.model + provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.embedding) + params.model = provider_resource_id + + response = await provider.openai_embeddings(params) + response.model = request_model_id + return response + + async def list_chat_completions( + self, + after: str | None = None, + limit: int | None = 20, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIChatCompletionResponse: + if self.store: + return await self.store.list_chat_completions(after, limit, model, order) + raise NotImplementedError("List chat completions is not supported: inference store is not configured.") + + async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: + if self.store: + return await self.store.get_chat_completion(completion_id) + raise NotImplementedError("Get chat completion is not supported: inference store is not configured.") + + async def _nonstream_openai_chat_completion( + self, provider: Inference, params: OpenAIChatCompletionRequestWithExtraBody + ) -> OpenAIChatCompletion: + response = await provider.openai_chat_completion(params) + for choice in response.choices: + # some providers return an empty list for no tool calls in non-streaming responses + # but the OpenAI API returns None. So, set tool_calls to None if it's empty + if choice.message and choice.message.tool_calls is not None and len(choice.message.tool_calls) == 0: + choice.message.tool_calls = None + return response + + async def health(self) -> dict[str, HealthResponse]: + health_statuses = {} + timeout = 1 # increasing the timeout to 1 second for health checks + for provider_id, impl in self.routing_table.impls_by_provider_id.items(): + try: + # check if the provider has a health method + if not hasattr(impl, "health"): + continue + health = await asyncio.wait_for(impl.health(), timeout=timeout) + health_statuses[provider_id] = health + except TimeoutError: + health_statuses[provider_id] = HealthResponse( + status=HealthStatus.ERROR, + message=f"Health check timed out after {timeout} seconds", + ) + except NotImplementedError: + health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED) + except Exception as e: + health_statuses[provider_id] = HealthResponse( + status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}" + ) + return health_statuses + + async def stream_tokens_and_compute_metrics_openai_chat( + self, + response: AsyncIterator[OpenAIChatCompletionChunk], + fully_qualified_model_id: str, + provider_id: str, + messages: list[OpenAIMessageParam] | None = None, + ) -> AsyncIterator[OpenAIChatCompletionChunk]: + """Stream OpenAI chat completion chunks, compute metrics, and store the final completion.""" + id = None + created = None + choices_data: dict[int, dict[str, Any]] = {} + + try: + async for chunk in response: + # Skip None chunks + if chunk is None: + continue + + # Capture ID and created timestamp from first chunk + if id is None and chunk.id: + id = chunk.id + if created is None and chunk.created: + created = chunk.created + + chunk.model = fully_qualified_model_id + + # Accumulate choice data for final assembly + if chunk.choices: + for choice_delta in chunk.choices: + idx = choice_delta.index + if idx not in choices_data: + choices_data[idx] = { + "content_parts": [], + "tool_calls_builder": {}, + "finish_reason": "stop", + "logprobs_content_parts": [], + } + current_choice_data = choices_data[idx] + + if choice_delta.delta: + delta = choice_delta.delta + if delta.content: + current_choice_data["content_parts"].append(delta.content) + if delta.tool_calls: + for tool_call_delta in delta.tool_calls: + tc_idx = tool_call_delta.index + if tc_idx not in current_choice_data["tool_calls_builder"]: + current_choice_data["tool_calls_builder"][tc_idx] = { + "id": None, + "type": "function", + "function_name_parts": [], + "function_arguments_parts": [], + } + builder = current_choice_data["tool_calls_builder"][tc_idx] + if tool_call_delta.id: + builder["id"] = tool_call_delta.id + if tool_call_delta.type: + builder["type"] = tool_call_delta.type + if tool_call_delta.function: + if tool_call_delta.function.name: + builder["function_name_parts"].append(tool_call_delta.function.name) + if tool_call_delta.function.arguments: + builder["function_arguments_parts"].append( + tool_call_delta.function.arguments + ) + if choice_delta.finish_reason: + current_choice_data["finish_reason"] = choice_delta.finish_reason + if choice_delta.logprobs and choice_delta.logprobs.content: + current_choice_data["logprobs_content_parts"].extend(choice_delta.logprobs.content) + + # Compute metrics on final chunk + if chunk.choices and chunk.choices[0].finish_reason: + completion_text = "" + for choice_data in choices_data.values(): + completion_text += "".join(choice_data["content_parts"]) + + yield chunk + finally: + # Store the final assembled completion + if id and self.store and messages: + assembled_choices: list[OpenAIChoice] = [] + for choice_idx, choice_data in choices_data.items(): + content_str = "".join(choice_data["content_parts"]) + assembled_tool_calls: list[OpenAIChatCompletionToolCall] = [] + if choice_data["tool_calls_builder"]: + for tc_build_data in choice_data["tool_calls_builder"].values(): + if tc_build_data["id"]: + func_name = "".join(tc_build_data["function_name_parts"]) + func_args = "".join(tc_build_data["function_arguments_parts"]) + assembled_tool_calls.append( + OpenAIChatCompletionToolCall( + id=tc_build_data["id"], + type=tc_build_data["type"], + function=OpenAIChatCompletionToolCallFunction( + name=func_name, arguments=func_args + ), + ) + ) + message = OpenAIAssistantMessageParam( + role="assistant", + content=content_str if content_str else None, + tool_calls=assembled_tool_calls if assembled_tool_calls else None, + ) + logprobs_content = choice_data["logprobs_content_parts"] + final_logprobs = OpenAIChoiceLogprobs(content=logprobs_content) if logprobs_content else None + + assembled_choices.append( + OpenAIChoice( + finish_reason=choice_data["finish_reason"], + index=choice_idx, + message=message, + logprobs=final_logprobs, + ) + ) + + final_response = OpenAIChatCompletion( + id=id, + choices=assembled_choices, + created=created or int(time.time()), + model=fully_qualified_model_id, + object="chat.completion", + ) + logger.debug(f"InferenceRouter.completion_response: {final_response}") + asyncio.create_task(self.store.store_chat_completion(final_response, messages)) diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py new file mode 100644 index 000000000..10c21ea88 --- /dev/null +++ b/src/llama_stack/core/routers/safety.py @@ -0,0 +1,113 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from opentelemetry import trace + +from llama_stack.core.datatypes import SafetyConfig +from llama_stack.log import get_logger +from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name +from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield + +logger = get_logger(name=__name__, category="core::routers") +tracer = trace.get_tracer(__name__) + + +class SafetyRouter(Safety): + def __init__( + self, + routing_table: RoutingTable, + safety_config: SafetyConfig | None = None, + ) -> None: + logger.debug("Initializing SafetyRouter") + self.routing_table = routing_table + self.safety_config = safety_config + + async def initialize(self) -> None: + logger.debug("SafetyRouter.initialize") + pass + + async def shutdown(self) -> None: + logger.debug("SafetyRouter.shutdown") + pass + + async def register_shield( + self, + shield_id: str, + provider_shield_id: str | None = None, + provider_id: str | None = None, + params: dict[str, Any] | None = None, + ) -> Shield: + logger.debug(f"SafetyRouter.register_shield: {shield_id}") + return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params) + + async def unregister_shield(self, identifier: str) -> None: + logger.debug(f"SafetyRouter.unregister_shield: {identifier}") + return await self.routing_table.unregister_shield(identifier) + + async def run_shield( + self, + shield_id: str, + messages: list[OpenAIMessageParam], + params: dict[str, Any] = None, + ) -> RunShieldResponse: + with tracer.start_as_current_span(name=safety_span_name(shield_id)): + logger.debug(f"SafetyRouter.run_shield: {shield_id}") + provider = await self.routing_table.get_provider_impl(shield_id) + response = await provider.run_shield( + shield_id=shield_id, + messages=messages, + params=params, + ) + + safety_request_span_attributes(shield_id, messages, response) + return response + + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: + list_shields_response = await self.routing_table.list_shields() + shields = list_shields_response.data + + selected_shield: Shield | None = None + provider_model: str | None = model + + if model: + matches: list[Shield] = [s for s in shields if model == s.provider_resource_id] + if not matches: + raise ValueError( + f"No shield associated with provider_resource id {model}: choose from {[s.provider_resource_id for s in shields]}" + ) + if len(matches) > 1: + raise ValueError( + f"Multiple shields associated with provider_resource id {model}: matched shields {[s.identifier for s in matches]}" + ) + selected_shield = matches[0] + else: + default_shield_id = self.safety_config.default_shield_id if self.safety_config else None + if not default_shield_id: + raise ValueError( + "No moderation model specified and no default_shield_id configured in safety config: select model " + f"from {[s.provider_resource_id or s.identifier for s in shields]}" + ) + + selected_shield = next((s for s in shields if s.identifier == default_shield_id), None) + if selected_shield is None: + raise ValueError( + f"Default moderation model not found. Choose from {[s.provider_resource_id or s.identifier for s in shields]}." + ) + + provider_model = selected_shield.provider_resource_id + + shield_id = selected_shield.identifier + logger.debug(f"SafetyRouter.run_moderation: {shield_id}") + provider = await self.routing_table.get_provider_impl(shield_id) + + response = await provider.run_moderation( + input=input, + model=provider_model, + ) + + return response diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py new file mode 100644 index 000000000..b387cb657 --- /dev/null +++ b/src/llama_stack/core/routers/tool_runtime.py @@ -0,0 +1,49 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.log import get_logger +from llama_stack_api import ( + URL, + ListToolDefsResponse, + ToolRuntime, +) + +from ..routing_tables.toolgroups import ToolGroupsRoutingTable + +logger = get_logger(name=__name__, category="core::routers") + + +class ToolRuntimeRouter(ToolRuntime): + def __init__( + self, + routing_table: ToolGroupsRoutingTable, + ) -> None: + logger.debug("Initializing ToolRuntimeRouter") + self.routing_table = routing_table + + async def initialize(self) -> None: + logger.debug("ToolRuntimeRouter.initialize") + pass + + async def shutdown(self) -> None: + logger.debug("ToolRuntimeRouter.shutdown") + pass + + async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None) -> Any: + logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}") + provider = await self.routing_table.get_provider_impl(tool_name) + return await provider.invoke_tool( + tool_name=tool_name, + kwargs=kwargs, + authorization=authorization, + ) + + async def list_runtime_tools( + self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None, authorization: str | None = None + ) -> ListToolDefsResponse: + return await self.routing_table.list_tools(tool_group_id, authorization=authorization) diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py new file mode 100644 index 000000000..5256dda44 --- /dev/null +++ b/src/llama_stack/core/routers/vector_io.py @@ -0,0 +1,479 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import uuid +from typing import Annotated, Any + +from fastapi import Body + +from llama_stack.core.datatypes import VectorStoresConfig +from llama_stack.log import get_logger +from llama_stack_api import ( + Chunk, + HealthResponse, + HealthStatus, + InterleavedContent, + ModelNotFoundError, + ModelType, + ModelTypeError, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, + QueryChunksResponse, + RoutingTable, + SearchRankingOptions, + VectorIO, + VectorStoreChunkingStrategy, + VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, + VectorStoreDeleteResponse, + VectorStoreFileBatchObject, + VectorStoreFileContentResponse, + VectorStoreFileDeleteResponse, + VectorStoreFileObject, + VectorStoreFilesListInBatchResponse, + VectorStoreFileStatus, + VectorStoreListResponse, + VectorStoreObject, + VectorStoreSearchResponsePage, +) + +logger = get_logger(name=__name__, category="core::routers") + + +class VectorIORouter(VectorIO): + """Routes to an provider based on the vector db identifier""" + + def __init__( + self, + routing_table: RoutingTable, + vector_stores_config: VectorStoresConfig | None = None, + ) -> None: + logger.debug("Initializing VectorIORouter") + self.routing_table = routing_table + self.vector_stores_config = vector_stores_config + + async def initialize(self) -> None: + logger.debug("VectorIORouter.initialize") + pass + + async def shutdown(self) -> None: + logger.debug("VectorIORouter.shutdown") + pass + + async def _get_embedding_model_dimension(self, embedding_model_id: str) -> int: + """Get the embedding dimension for a specific embedding model.""" + all_models = await self.routing_table.get_all_with_type("model") + + for model in all_models: + if model.identifier == embedding_model_id and model.model_type == ModelType.embedding: + dimension = model.metadata.get("embedding_dimension") + if dimension is None: + raise ValueError(f"Embedding model '{embedding_model_id}' has no embedding_dimension in metadata") + return int(dimension) + + raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") + + async def insert_chunks( + self, + vector_store_id: str, + chunks: list[Chunk], + ttl_seconds: int | None = None, + ) -> None: + doc_ids = [chunk.document_id for chunk in chunks[:3]] + logger.debug( + f"VectorIORouter.insert_chunks: {vector_store_id}, {len(chunks)} chunks, " + f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}" + ) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds) + + async def query_chunks( + self, + vector_store_id: str, + query: InterleavedContent, + params: dict[str, Any] | None = None, + ) -> QueryChunksResponse: + logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.query_chunks(vector_store_id, query, params) + + # OpenAI Vector Stores API endpoints + async def openai_create_vector_store( + self, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], + ) -> VectorStoreObject: + # Extract llama-stack-specific parameters from extra_body + extra = params.model_extra or {} + embedding_model = extra.get("embedding_model") + embedding_dimension = extra.get("embedding_dimension") + provider_id = extra.get("provider_id") + + # Use default embedding model if not specified + if ( + embedding_model is None + and self.vector_stores_config + and self.vector_stores_config.default_embedding_model is not None + ): + # Construct the full model ID with provider prefix + embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id + model_id = self.vector_stores_config.default_embedding_model.model_id + embedding_model = f"{embedding_provider_id}/{model_id}" + + if embedding_model is not None and embedding_dimension is None: + embedding_dimension = await self._get_embedding_model_dimension(embedding_model) + + # Validate that embedding model exists and is of the correct type + if embedding_model is not None: + model = await self.routing_table.get_object_by_identifier("model", embedding_model) + if model is None: + raise ModelNotFoundError(embedding_model) + if model.model_type != ModelType.embedding: + raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) + + # Auto-select provider if not specified + if provider_id is None: + num_providers = len(self.routing_table.impls_by_provider_id) + if num_providers == 0: + raise ValueError("No vector_io providers available") + if num_providers > 1: + available_providers = list(self.routing_table.impls_by_provider_id.keys()) + # Use default configured provider + if self.vector_stores_config and self.vector_stores_config.default_provider_id: + default_provider = self.vector_stores_config.default_provider_id + if default_provider in available_providers: + provider_id = default_provider + logger.debug(f"Using configured default vector store provider: {provider_id}") + else: + raise ValueError( + f"Configured default vector store provider '{default_provider}' not found. " + f"Available providers: {available_providers}" + ) + else: + raise ValueError( + f"Multiple vector_io providers available. Please specify provider_id in extra_body. " + f"Available providers: {available_providers}" + ) + else: + provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] + + vector_store_id = f"vs_{uuid.uuid4()}" + registered_vector_store = await self.routing_table.register_vector_store( + vector_store_id=vector_store_id, + embedding_model=embedding_model, + embedding_dimension=embedding_dimension, + provider_id=provider_id, + provider_vector_store_id=vector_store_id, + vector_store_name=params.name, + ) + provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier) + + # Update model_extra with registered values so provider uses the already-registered vector_store + if params.model_extra is None: + params.model_extra = {} + params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id + params.model_extra["provider_id"] = registered_vector_store.provider_id + if embedding_model is not None: + params.model_extra["embedding_model"] = embedding_model + if embedding_dimension is not None: + params.model_extra["embedding_dimension"] = embedding_dimension + + # Set chunking strategy explicitly if not provided + if params.chunking_strategy is None or params.chunking_strategy.type == "auto": + # actualize the chunking strategy to static + params.chunking_strategy = VectorStoreChunkingStrategyStatic( + static=VectorStoreChunkingStrategyStaticConfig() + ) + + return await provider.openai_create_vector_store(params) + + async def openai_list_vector_stores( + self, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ) -> VectorStoreListResponse: + logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") + # Route to default provider for now - could aggregate from all providers in the future + # call retrieve on each vector dbs to get list of vector stores + vector_stores = await self.routing_table.get_all_with_type("vector_store") + all_stores = [] + for vector_store in vector_stores: + try: + provider = await self.routing_table.get_provider_impl(vector_store.identifier) + vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier) + all_stores.append(vector_store) + except Exception as e: + logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}") + continue + + # Sort by created_at + reverse_order = order == "desc" + all_stores.sort(key=lambda x: x.created_at, reverse=reverse_order) + + # Apply cursor-based pagination + if after: + after_index = next((i for i, store in enumerate(all_stores) if store.id == after), -1) + if after_index >= 0: + all_stores = all_stores[after_index + 1 :] + + if before: + before_index = next( + (i for i, store in enumerate(all_stores) if store.id == before), + len(all_stores), + ) + all_stores = all_stores[:before_index] + + # Apply limit + limited_stores = all_stores[:limit] + + # Determine pagination info + has_more = len(all_stores) > limit + first_id = limited_stores[0].id if limited_stores else None + last_id = limited_stores[-1].id if limited_stores else None + + return VectorStoreListResponse( + data=limited_stores, + has_more=has_more, + first_id=first_id, + last_id=last_id, + ) + + async def openai_retrieve_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreObject: + logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) + + async def openai_update_vector_store( + self, + vector_store_id: str, + name: str | None = None, + expires_after: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreObject: + logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") + + # Check if provider_id is being changed (not supported) + if metadata and "provider_id" in metadata: + current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id) + if current_store and current_store.provider_id != metadata["provider_id"]: + raise ValueError("provider_id cannot be changed after vector store creation") + + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( + vector_store_id=vector_store_id, + name=name, + expires_after=expires_after, + metadata=metadata, + ) + + async def openai_delete_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreDeleteResponse: + logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}") + return await self.routing_table.openai_delete_vector_store(vector_store_id) + + async def openai_search_vector_store( + self, + vector_store_id: str, + query: str | list[str], + filters: dict[str, Any] | None = None, + max_num_results: int | None = 10, + ranking_options: SearchRankingOptions | None = None, + rewrite_query: bool | None = False, + search_mode: str | None = "vector", + ) -> VectorStoreSearchResponsePage: + logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( + vector_store_id=vector_store_id, + query=query, + filters=filters, + max_num_results=max_num_results, + ranking_options=ranking_options, + rewrite_query=rewrite_query, + search_mode=search_mode, + ) + + async def openai_attach_file_to_vector_store( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any] | None = None, + chunking_strategy: VectorStoreChunkingStrategy | None = None, + ) -> VectorStoreFileObject: + logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}") + if chunking_strategy is None or chunking_strategy.type == "auto": + chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig()) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_list_files_in_vector_store( + self, + vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, + ) -> list[VectorStoreFileObject]: + logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( + vector_store_id=vector_store_id, + limit=limit, + order=order, + after=after, + before=before, + filter=filter, + ) + + async def openai_retrieve_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileObject: + logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_retrieve_vector_store_file_contents( + self, + vector_store_id: str, + file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, + ) -> VectorStoreFileContentResponse: + logger.debug( + f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, " + f"include_embeddings={include_embeddings}, include_metadata={include_metadata}" + ) + + return await self.routing_table.openai_retrieve_vector_store_file_contents( + vector_store_id=vector_store_id, + file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, + ) + + async def openai_update_vector_store_file( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any], + ) -> VectorStoreFileObject: + logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + ) + + async def openai_delete_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileDeleteResponse: + logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def health(self) -> dict[str, HealthResponse]: + health_statuses = {} + timeout = 1 # increasing the timeout to 1 second for health checks + for provider_id, impl in self.routing_table.impls_by_provider_id.items(): + try: + # check if the provider has a health method + if not hasattr(impl, "health"): + continue + health = await asyncio.wait_for(impl.health(), timeout=timeout) + health_statuses[provider_id] = health + except TimeoutError: + health_statuses[provider_id] = HealthResponse( + status=HealthStatus.ERROR, + message=f"Health check timed out after {timeout} seconds", + ) + except NotImplementedError: + health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED) + except Exception as e: + health_statuses[provider_id] = HealthResponse( + status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}" + ) + return health_statuses + + async def openai_create_vector_store_file_batch( + self, + vector_store_id: str, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], + ) -> VectorStoreFileBatchObject: + logger.debug( + f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files" + ) + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch(vector_store_id, params) + + async def openai_retrieve_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ) -> VectorStoreFileBatchObject: + logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) + + async def openai_list_files_in_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + after: str | None = None, + before: str | None = None, + filter: str | None = None, + limit: int | None = 20, + order: str | None = "desc", + ) -> VectorStoreFilesListInBatchResponse: + logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + after=after, + before=before, + filter=filter, + limit=limit, + order=order, + ) + + async def openai_cancel_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ) -> VectorStoreFileBatchObject: + logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}") + provider = await self.routing_table.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) diff --git a/llama_stack/core/conversations/__init__.py b/src/llama_stack/core/routing_tables/__init__.py similarity index 100% rename from llama_stack/core/conversations/__init__.py rename to src/llama_stack/core/routing_tables/__init__.py diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py new file mode 100644 index 000000000..9037ffe8b --- /dev/null +++ b/src/llama_stack/core/routing_tables/benchmarks.py @@ -0,0 +1,62 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import ( + BenchmarkWithOwner, +) +from llama_stack.log import get_logger +from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse + +from .common import CommonRoutingTableImpl + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): + async def list_benchmarks(self) -> ListBenchmarksResponse: + return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark")) + + async def get_benchmark(self, benchmark_id: str) -> Benchmark: + benchmark = await self.get_object_by_identifier("benchmark", benchmark_id) + if benchmark is None: + raise ValueError(f"Benchmark '{benchmark_id}' not found") + return benchmark + + async def register_benchmark( + self, + benchmark_id: str, + dataset_id: str, + scoring_functions: list[str], + metadata: dict[str, Any] | None = None, + provider_benchmark_id: str | None = None, + provider_id: str | None = None, + ) -> None: + if metadata is None: + metadata = {} + if provider_id is None: + if len(self.impls_by_provider_id) == 1: + provider_id = list(self.impls_by_provider_id.keys())[0] + else: + raise ValueError( + "No provider specified and multiple providers available. Please specify a provider_id." + ) + if provider_benchmark_id is None: + provider_benchmark_id = benchmark_id + benchmark = BenchmarkWithOwner( + identifier=benchmark_id, + dataset_id=dataset_id, + scoring_functions=scoring_functions, + metadata=metadata, + provider_id=provider_id, + provider_resource_id=provider_benchmark_id, + ) + await self.register_object(benchmark) + + async def unregister_benchmark(self, benchmark_id: str) -> None: + existing_benchmark = await self.get_benchmark(benchmark_id) + await self.unregister_object(existing_benchmark) diff --git a/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py similarity index 97% rename from llama_stack/core/routing_tables/common.py rename to src/llama_stack/core/routing_tables/common.py index d6faf93c5..a9e3ff95f 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/src/llama_stack/core/routing_tables/common.py @@ -6,9 +6,6 @@ from typing import Any -from llama_stack.apis.common.errors import ModelNotFoundError -from llama_stack.apis.models import Model -from llama_stack.apis.resource import ResourceType from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.access_control.datatypes import Action from llama_stack.core.datatypes import ( @@ -21,7 +18,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.store import DistributionRegistry from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, RoutingTable +from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py new file mode 100644 index 000000000..62fd07b13 --- /dev/null +++ b/src/llama_stack/core/routing_tables/datasets.py @@ -0,0 +1,91 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import uuid +from typing import Any + +from llama_stack.core.datatypes import ( + DatasetWithOwner, +) +from llama_stack.log import get_logger +from llama_stack_api import ( + Dataset, + DatasetNotFoundError, + DatasetPurpose, + Datasets, + DatasetType, + DataSource, + ListDatasetsResponse, + ResourceType, + RowsDataSource, + URIDataSource, +) + +from .common import CommonRoutingTableImpl + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): + async def list_datasets(self) -> ListDatasetsResponse: + return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) + + async def get_dataset(self, dataset_id: str) -> Dataset: + dataset = await self.get_object_by_identifier("dataset", dataset_id) + if dataset is None: + raise DatasetNotFoundError(dataset_id) + return dataset + + async def register_dataset( + self, + purpose: DatasetPurpose, + source: DataSource, + metadata: dict[str, Any] | None = None, + dataset_id: str | None = None, + ) -> Dataset: + if isinstance(source, dict): + if source["type"] == "uri": + source = URIDataSource.parse_obj(source) + elif source["type"] == "rows": + source = RowsDataSource.parse_obj(source) + + if not dataset_id: + dataset_id = f"dataset-{str(uuid.uuid4())}" + + provider_dataset_id = dataset_id + + # infer provider from source + if metadata and metadata.get("provider_id"): + provider_id = metadata.get("provider_id") # pass through from nvidia datasetio + elif source.type == DatasetType.rows.value: + provider_id = "localfs" + elif source.type == DatasetType.uri.value: + # infer provider from uri + if source.uri.startswith("huggingface"): + provider_id = "huggingface" + else: + provider_id = "localfs" + else: + raise ValueError(f"Unknown data source type: {source.type}") + + if metadata is None: + metadata = {} + + dataset = DatasetWithOwner( + identifier=dataset_id, + provider_resource_id=provider_dataset_id, + provider_id=provider_id, + purpose=purpose, + source=source, + metadata=metadata, + ) + + await self.register_object(dataset) + return dataset + + async def unregister_dataset(self, dataset_id: str) -> None: + dataset = await self.get_dataset(dataset_id) + await self.unregister_object(dataset) diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py new file mode 100644 index 000000000..1facbb27b --- /dev/null +++ b/src/llama_stack/core/routing_tables/models.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import time +from typing import Any + +from llama_stack.core.datatypes import ( + ModelWithOwner, + RegistryEntrySource, +) +from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.log import get_logger +from llama_stack_api import ( + ListModelsResponse, + Model, + ModelNotFoundError, + Models, + ModelType, + OpenAIListModelsResponse, + OpenAIModel, +) + +from .common import CommonRoutingTableImpl, lookup_model + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class ModelsRoutingTable(CommonRoutingTableImpl, Models): + listed_providers: set[str] = set() + + async def refresh(self) -> None: + for provider_id, provider in self.impls_by_provider_id.items(): + refresh = await provider.should_refresh_models() + refresh = refresh or provider_id not in self.listed_providers + if not refresh: + continue + + try: + models = await provider.list_models() + except Exception as e: + logger.warning(f"Model refresh failed for provider {provider_id}: {e}") + continue + + self.listed_providers.add(provider_id) + if models is None: + continue + + await self.update_registered_models(provider_id, models) + + async def _get_dynamic_models_from_provider_data(self) -> list[Model]: + """ + Fetch models from providers that have credentials in the current request's provider_data. + + This allows users to see models available to them from providers that require + per-request API keys (via X-LlamaStack-Provider-Data header). + + Returns models with fully qualified identifiers (provider_id/model_id) but does NOT + cache them in the registry since they are user-specific. + """ + provider_data = PROVIDER_DATA_VAR.get() + if not provider_data: + return [] + + dynamic_models = [] + + for provider_id, provider in self.impls_by_provider_id.items(): + # Check if this provider supports provider_data + if not isinstance(provider, NeedsRequestProviderData): + continue + + # Check if provider has a validator (some providers like ollama don't need per-request credentials) + spec = getattr(provider, "__provider_spec__", None) + if not spec or not getattr(spec, "provider_data_validator", None): + continue + + # Validate provider_data silently - we're speculatively checking all providers + # so validation failures are expected when user didn't provide keys for this provider + try: + validator = instantiate_class_type(spec.provider_data_validator) + validator(**provider_data) + except Exception: + # User didn't provide credentials for this provider - skip silently + continue + + # Validation succeeded! User has credentials for this provider + # Now try to list models + try: + models = await provider.list_models() + if not models: + continue + + # Ensure models have fully qualified identifiers with provider_id prefix + for model in models: + # Only add prefix if model identifier doesn't already have it + if not model.identifier.startswith(f"{provider_id}/"): + model.identifier = f"{provider_id}/{model.provider_resource_id}" + + dynamic_models.append(model) + + logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data") + + except Exception as e: + logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}") + continue + + return dynamic_models + + async def list_models(self) -> ListModelsResponse: + # Get models from registry + registry_models = await self.get_all_with_type("model") + + # Get additional models available via provider_data (user-specific, not cached) + dynamic_models = await self._get_dynamic_models_from_provider_data() + + # Combine, avoiding duplicates (registry takes precedence) + registry_identifiers = {m.identifier for m in registry_models} + unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers] + + return ListModelsResponse(data=registry_models + unique_dynamic_models) + + async def openai_list_models(self) -> OpenAIListModelsResponse: + # Get models from registry + registry_models = await self.get_all_with_type("model") + + # Get additional models available via provider_data (user-specific, not cached) + dynamic_models = await self._get_dynamic_models_from_provider_data() + + # Combine, avoiding duplicates (registry takes precedence) + registry_identifiers = {m.identifier for m in registry_models} + unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers] + + all_models = registry_models + unique_dynamic_models + + openai_models = [ + OpenAIModel( + id=model.identifier, + object="model", + created=int(time.time()), + owned_by="llama_stack", + custom_metadata={ + "model_type": model.model_type, + "provider_id": model.provider_id, + "provider_resource_id": model.provider_resource_id, + **model.metadata, + }, + ) + for model in all_models + ] + return OpenAIListModelsResponse(data=openai_models) + + async def get_model(self, model_id: str) -> Model: + return await lookup_model(self, model_id) + + async def get_provider_impl(self, model_id: str) -> Any: + model = await lookup_model(self, model_id) + if model.provider_id not in self.impls_by_provider_id: + raise ValueError(f"Provider {model.provider_id} not found in the routing table") + return self.impls_by_provider_id[model.provider_id] + + async def has_model(self, model_id: str) -> bool: + """ + Check if a model exists in the routing table. + + :param model_id: The model identifier to check + :return: True if the model exists, False otherwise + """ + try: + await lookup_model(self, model_id) + return True + except ModelNotFoundError: + return False + + async def register_model( + self, + model_id: str, + provider_model_id: str | None = None, + provider_id: str | None = None, + metadata: dict[str, Any] | None = None, + model_type: ModelType | None = None, + ) -> Model: + if provider_id is None: + # If provider_id not specified, use the only provider if it supports this model + if len(self.impls_by_provider_id) == 1: + provider_id = list(self.impls_by_provider_id.keys())[0] + else: + raise ValueError( + f"Please specify a provider_id for model {model_id} since multiple providers are available: {self.impls_by_provider_id.keys()}.\n\n" + "Use the provider_id as a prefix to disambiguate, e.g. 'provider_id/model_id'." + ) + + provider_model_id = provider_model_id or model_id + metadata = metadata or {} + model_type = model_type or ModelType.llm + if "embedding_dimension" not in metadata and model_type == ModelType.embedding: + raise ValueError("Embedding model must have an embedding dimension in its metadata") + + identifier = f"{provider_id}/{provider_model_id}" + model = ModelWithOwner( + identifier=identifier, + provider_resource_id=provider_model_id, + provider_id=provider_id, + metadata=metadata, + model_type=model_type, + source=RegistryEntrySource.via_register_api, + ) + registered_model = await self.register_object(model) + return registered_model + + async def unregister_model(self, model_id: str) -> None: + existing_model = await self.get_model(model_id) + if existing_model is None: + raise ModelNotFoundError(model_id) + await self.unregister_object(existing_model) + + async def update_registered_models( + self, + provider_id: str, + models: list[Model], + ) -> None: + existing_models = await self.get_all_with_type("model") + + # we may have an alias for the model registered by the user (or during initialization + # from run.yaml) that we need to keep track of + model_ids = {} + for model in existing_models: + if model.provider_id != provider_id: + continue + if model.source == RegistryEntrySource.via_register_api: + model_ids[model.provider_resource_id] = model.identifier + continue + + logger.debug(f"unregistering model {model.identifier}") + await self.unregister_object(model) + + for model in models: + if model.provider_resource_id in model_ids: + # avoid overwriting a non-provider-registered model entry + continue + + if model.identifier == model.provider_resource_id: + model.identifier = f"{provider_id}/{model.provider_resource_id}" + + logger.debug(f"registering model {model.identifier} ({model.provider_resource_id})") + await self.register_object( + ModelWithOwner( + identifier=model.identifier, + provider_resource_id=model.provider_resource_id, + provider_id=provider_id, + metadata=model.metadata, + model_type=model.model_type, + source=RegistryEntrySource.listed_from_provider, + ) + ) diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py new file mode 100644 index 000000000..65ed26b85 --- /dev/null +++ b/src/llama_stack/core/routing_tables/scoring_functions.py @@ -0,0 +1,66 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.core.datatypes import ( + ScoringFnWithOwner, +) +from llama_stack.log import get_logger +from llama_stack_api import ( + ListScoringFunctionsResponse, + ParamType, + ResourceType, + ScoringFn, + ScoringFnParams, + ScoringFunctions, +) + +from .common import CommonRoutingTableImpl + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): + async def list_scoring_functions(self) -> ListScoringFunctionsResponse: + return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value)) + + async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: + scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id) + if scoring_fn is None: + raise ValueError(f"Scoring function '{scoring_fn_id}' not found") + return scoring_fn + + async def register_scoring_function( + self, + scoring_fn_id: str, + description: str, + return_type: ParamType, + provider_scoring_fn_id: str | None = None, + provider_id: str | None = None, + params: ScoringFnParams | None = None, + ) -> None: + if provider_scoring_fn_id is None: + provider_scoring_fn_id = scoring_fn_id + if provider_id is None: + if len(self.impls_by_provider_id) == 1: + provider_id = list(self.impls_by_provider_id.keys())[0] + else: + raise ValueError( + "No provider specified and multiple providers available. Please specify a provider_id." + ) + scoring_fn = ScoringFnWithOwner( + identifier=scoring_fn_id, + description=description, + return_type=return_type, + provider_resource_id=provider_scoring_fn_id, + provider_id=provider_id, + params=params, + ) + scoring_fn.provider_id = provider_id + await self.register_object(scoring_fn) + + async def unregister_scoring_function(self, scoring_fn_id: str) -> None: + existing_scoring_fn = await self.get_scoring_function(scoring_fn_id) + await self.unregister_object(existing_scoring_fn) diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py new file mode 100644 index 000000000..97b2efb96 --- /dev/null +++ b/src/llama_stack/core/routing_tables/shields.py @@ -0,0 +1,60 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import ( + ShieldWithOwner, +) +from llama_stack.log import get_logger +from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields + +from .common import CommonRoutingTableImpl + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): + async def list_shields(self) -> ListShieldsResponse: + return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value)) + + async def get_shield(self, identifier: str) -> Shield: + shield = await self.get_object_by_identifier("shield", identifier) + if shield is None: + raise ValueError(f"Shield '{identifier}' not found") + return shield + + async def register_shield( + self, + shield_id: str, + provider_shield_id: str | None = None, + provider_id: str | None = None, + params: dict[str, Any] | None = None, + ) -> Shield: + if provider_shield_id is None: + provider_shield_id = shield_id + if provider_id is None: + # If provider_id not specified, use the only provider if it supports this shield type + if len(self.impls_by_provider_id) == 1: + provider_id = list(self.impls_by_provider_id.keys())[0] + else: + raise ValueError( + "No provider specified and multiple providers available. Please specify a provider_id." + ) + if params is None: + params = {} + shield = ShieldWithOwner( + identifier=shield_id, + provider_resource_id=provider_shield_id, + provider_id=provider_id, + params=params, + ) + await self.register_object(shield) + return shield + + async def unregister_shield(self, identifier: str) -> None: + existing_shield = await self.get_shield(identifier) + await self.unregister_object(existing_shield) diff --git a/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py similarity index 89% rename from llama_stack/core/routing_tables/toolgroups.py rename to src/llama_stack/core/routing_tables/toolgroups.py index 2d47bbb17..8676ce35e 100644 --- a/llama_stack/core/routing_tables/toolgroups.py +++ b/src/llama_stack/core/routing_tables/toolgroups.py @@ -6,11 +6,17 @@ from typing import Any -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.errors import ToolGroupNotFoundError -from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner from llama_stack.log import get_logger +from llama_stack_api import ( + URL, + ListToolDefsResponse, + ListToolGroupsResponse, + ToolDef, + ToolGroup, + ToolGroupNotFoundError, + ToolGroups, +) from .common import CommonRoutingTableImpl @@ -43,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): routing_key = self.tool_to_toolgroup[routing_key] return await super().get_provider_impl(routing_key, provider_id) - async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse: + async def list_tools( + self, toolgroup_id: str | None = None, authorization: str | None = None + ) -> ListToolDefsResponse: if toolgroup_id: if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id): toolgroup_id = group_id @@ -55,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): for toolgroup in toolgroups: if toolgroup.identifier not in self.toolgroups_to_tools: try: - await self._index_tools(toolgroup) + await self._index_tools(toolgroup, authorization=authorization) except AuthenticationRequiredError: # Send authentication errors back to the client so it knows # that it needs to supply credentials for remote MCP servers. @@ -70,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): return ListToolDefsResponse(data=all_tools) - async def _index_tools(self, toolgroup: ToolGroup): + async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None): provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id) - tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint) + tooldefs_response = await provider_impl.list_runtime_tools( + toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization + ) tooldefs = tooldefs_response.data for t in tooldefs: diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py new file mode 100644 index 000000000..93c119542 --- /dev/null +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -0,0 +1,298 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import ( + VectorStoreWithOwner, +) +from llama_stack.log import get_logger + +# Removed VectorStores import to avoid exposing public API +from llama_stack_api import ( + ModelNotFoundError, + ModelType, + ModelTypeError, + ResourceType, + SearchRankingOptions, + VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, + VectorStoreFileContentResponse, + VectorStoreFileDeleteResponse, + VectorStoreFileObject, + VectorStoreFileStatus, + VectorStoreObject, + VectorStoreSearchResponsePage, +) + +from .common import CommonRoutingTableImpl, lookup_model + +logger = get_logger(name=__name__, category="core::routing_tables") + + +class VectorStoresRoutingTable(CommonRoutingTableImpl): + """Internal routing table for vector_store operations. + + Does not inherit from VectorStores to avoid exposing public API endpoints. + Only provides internal routing functionality for VectorIORouter. + """ + + # Internal methods only - no public API exposure + + async def register_vector_store( + self, + vector_store_id: str, + embedding_model: str, + embedding_dimension: int | None = 384, + provider_id: str | None = None, + provider_vector_store_id: str | None = None, + vector_store_name: str | None = None, + ) -> Any: + if provider_id is None: + if len(self.impls_by_provider_id) > 0: + provider_id = list(self.impls_by_provider_id.keys())[0] + if len(self.impls_by_provider_id) > 1: + logger.warning( + f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}." + ) + else: + raise ValueError("No provider available. Please configure a vector_io provider.") + model = await lookup_model(self, embedding_model) + if model is None: + raise ModelNotFoundError(embedding_model) + if model.model_type != ModelType.embedding: + raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) + + vector_store = VectorStoreWithOwner( + identifier=vector_store_id, + type=ResourceType.vector_store.value, + provider_id=provider_id, + provider_resource_id=provider_vector_store_id, + embedding_model=embedding_model, + embedding_dimension=embedding_dimension, + vector_store_name=vector_store_name, + ) + await self.register_object(vector_store) + return vector_store + + async def openai_retrieve_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreObject: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store(vector_store_id) + + async def openai_update_vector_store( + self, + vector_store_id: str, + name: str | None = None, + expires_after: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store( + vector_store_id=vector_store_id, + name=name, + expires_after=expires_after, + metadata=metadata, + ) + + async def openai_delete_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreDeleteResponse: + await self.assert_action_allowed("delete", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + result = await provider.openai_delete_vector_store(vector_store_id) + await self.unregister_vector_store(vector_store_id) + return result + + async def unregister_vector_store(self, vector_store_id: str) -> None: + """Remove the vector store from the routing table registry.""" + try: + vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id) + if vector_store_obj: + await self.unregister_object(vector_store_obj) + except Exception as e: + # Log the error but don't fail the operation + logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") + + async def openai_search_vector_store( + self, + vector_store_id: str, + query: str | list[str], + filters: dict[str, Any] | None = None, + max_num_results: int | None = 10, + ranking_options: SearchRankingOptions | None = None, + rewrite_query: bool | None = False, + search_mode: str | None = "vector", + ) -> VectorStoreSearchResponsePage: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_search_vector_store( + vector_store_id=vector_store_id, + query=query, + filters=filters, + max_num_results=max_num_results, + ranking_options=ranking_options, + rewrite_query=rewrite_query, + search_mode=search_mode, + ) + + async def openai_attach_file_to_vector_store( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any] | None = None, + chunking_strategy: VectorStoreChunkingStrategy | None = None, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_attach_file_to_vector_store( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_list_files_in_vector_store( + self, + vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, + ) -> list[VectorStoreFileObject]: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store( + vector_store_id=vector_store_id, + limit=limit, + order=order, + after=after, + before=before, + filter=filter, + ) + + async def openai_retrieve_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileObject: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_retrieve_vector_store_file_contents( + self, + vector_store_id: str, + file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, + ) -> VectorStoreFileContentResponse: + await self.assert_action_allowed("read", "vector_store", vector_store_id) + + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_contents( + vector_store_id=vector_store_id, + file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, + ) + + async def openai_update_vector_store_file( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any], + ) -> VectorStoreFileObject: + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + attributes=attributes, + ) + + async def openai_delete_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileDeleteResponse: + await self.assert_action_allowed("delete", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_file( + vector_store_id=vector_store_id, + file_id=file_id, + ) + + async def openai_create_vector_store_file_batch( + self, + vector_store_id: str, + file_ids: list[str], + attributes: dict[str, Any] | None = None, + chunking_strategy: Any | None = None, + ): + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_create_vector_store_file_batch( + vector_store_id=vector_store_id, + file_ids=file_ids, + attributes=attributes, + chunking_strategy=chunking_strategy, + ) + + async def openai_retrieve_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) + + async def openai_list_files_in_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + after: str | None = None, + before: str | None = None, + filter: str | None = None, + limit: int | None = 20, + order: str | None = "desc", + ): + await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_files_in_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + after=after, + before=before, + filter=filter, + limit=limit, + order=order, + ) + + async def openai_cancel_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ): + await self.assert_action_allowed("update", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_cancel_vector_store_file_batch( + batch_id=batch_id, + vector_store_id=vector_store_id, + ) diff --git a/llama_stack/core/prompts/__init__.py b/src/llama_stack/core/server/__init__.py similarity index 100% rename from llama_stack/core/prompts/__init__.py rename to src/llama_stack/core/server/__init__.py diff --git a/llama_stack/core/server/auth.py b/src/llama_stack/core/server/auth.py similarity index 100% rename from llama_stack/core/server/auth.py rename to src/llama_stack/core/server/auth.py diff --git a/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py similarity index 93% rename from llama_stack/core/server/auth_providers.py rename to src/llama_stack/core/server/auth_providers.py index 0fe5f1558..66942dd39 100644 --- a/llama_stack/core/server/auth_providers.py +++ b/src/llama_stack/core/server/auth_providers.py @@ -6,13 +6,13 @@ import ssl from abc import ABC, abstractmethod +from typing import Any from urllib.parse import parse_qs, urljoin, urlparse import httpx import jwt from pydantic import BaseModel, Field -from llama_stack.apis.common.errors import TokenValidationError from llama_stack.core.datatypes import ( AuthenticationConfig, CustomAuthConfig, @@ -22,6 +22,7 @@ from llama_stack.core.datatypes import ( User, ) from llama_stack.log import get_logger +from llama_stack_api import TokenValidationError logger = get_logger(name=__name__, category="core::auth") @@ -143,14 +144,21 @@ class OAuth2TokenAuthProvider(AuthProvider): if self.config.jwks and self.config.jwks.token: headers["Authorization"] = f"Bearer {self.config.jwks.token}" - self._jwks_client = jwt.PyJWKClient( - self.config.jwks.uri if self.config.jwks else None, - cache_keys=True, - max_cached_keys=10, - lifespan=self.config.jwks.key_recheck_period if self.config.jwks else None, - headers=headers, - ssl_context=ssl_context, - ) + # Ensure uri is not None for PyJWKClient + if not self.config.jwks or not self.config.jwks.uri: + raise ValueError("JWKS configuration requires a valid URI") + + # Build kwargs conditionally to avoid passing None values + jwks_kwargs: dict[str, Any] = { + "cache_keys": True, + "max_cached_keys": 10, + "headers": headers, + "ssl_context": ssl_context, + } + if self.config.jwks.key_recheck_period is not None: + jwks_kwargs["lifespan"] = self.config.jwks.key_recheck_period + + self._jwks_client = jwt.PyJWKClient(self.config.jwks.uri, **jwks_kwargs) return self._jwks_client async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User: @@ -197,23 +205,31 @@ class OAuth2TokenAuthProvider(AuthProvider): if self.config.introspection is None: raise ValueError("Introspection is not configured") + # ssl_ctxt can be None, bool, str, or SSLContext - httpx accepts all + ssl_ctxt: ssl.SSLContext | bool = False # Default to no verification if no cafile + if self.config.tls_cafile: + ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix()) + + # Build post kwargs conditionally based on auth method + post_kwargs: dict[str, Any] = { + "url": self.config.introspection.url, + "data": form, + "timeout": 10.0, + } + if self.config.introspection.send_secret_in_body: form["client_id"] = self.config.introspection.client_id form["client_secret"] = self.config.introspection.client_secret - auth = None else: - auth = (self.config.introspection.client_id, self.config.introspection.client_secret) - ssl_ctxt = None - if self.config.tls_cafile: - ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix()) + # httpx auth parameter expects tuple[str | bytes, str | bytes] + post_kwargs["auth"] = ( + self.config.introspection.client_id, + self.config.introspection.client_secret, + ) + try: async with httpx.AsyncClient(verify=ssl_ctxt) as client: - response = await client.post( - self.config.introspection.url, - data=form, - auth=auth, - timeout=10.0, # Add a reasonable timeout - ) + response = await client.post(**post_kwargs) if response.status_code != httpx.codes.OK: logger.warning(f"Token introspection failed with status code: {response.status_code}") raise ValueError(f"Token introspection failed: {response.status_code}") diff --git a/llama_stack/core/server/quota.py b/src/llama_stack/core/server/quota.py similarity index 96% rename from llama_stack/core/server/quota.py rename to src/llama_stack/core/server/quota.py index 689f0e4c3..d74d3e89d 100644 --- a/llama_stack/core/server/quota.py +++ b/src/llama_stack/core/server/quota.py @@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta from starlette.types import ASGIApp, Receive, Scope, Send from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType +from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl +from llama_stack_api.internal.kvstore import KVStore logger = get_logger(name=__name__, category="core::server") diff --git a/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py similarity index 80% rename from llama_stack/core/server/routes.py rename to src/llama_stack/core/server/routes.py index 4970d0bf8..af5002565 100644 --- a/llama_stack/core/server/routes.py +++ b/src/llama_stack/core/server/routes.py @@ -12,10 +12,8 @@ from typing import Any from aiohttp import hdrs from starlette.routing import Route -from llama_stack.apis.datatypes import Api, ExternalApiSpec -from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.core.resolver import api_protocol_map -from llama_stack.schema_utils import WebMethod +from llama_stack_api import Api, ExternalApiSpec, WebMethod EndpointFunc = Callable[..., Any] PathParams = dict[str, str] @@ -25,33 +23,16 @@ RouteImpls = dict[str, PathImpl] RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod] -def toolgroup_protocol_map(): - return { - SpecialToolGroup.rag_tool: RAGToolRuntime, - } - - def get_all_api_routes( external_apis: dict[Api, ExternalApiSpec] | None = None, ) -> dict[Api, list[tuple[Route, WebMethod]]]: apis = {} protocols = api_protocol_map(external_apis) - toolgroup_protocols = toolgroup_protocol_map() for api, protocol in protocols.items(): routes = [] protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction) - # HACK ALERT - if api == Api.tool_runtime: - for tool_group in SpecialToolGroup: - sub_protocol = toolgroup_protocols[tool_group] - sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction) - for name, method in sub_protocol_methods: - if not hasattr(method, "__webmethod__"): - continue - protocol_methods.append((f"{tool_group.value}.{name}", method)) - for name, method in protocol_methods: # Get all webmethods for this method (supports multiple decorators) webmethods = getattr(method, "__webmethods__", []) @@ -68,8 +49,9 @@ def get_all_api_routes( else: http_method = hdrs.METH_POST routes.append( - (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) - ) # setting endpoint to None since don't use a Router object + # setting endpoint to None since don't use a Router object + (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) # type: ignore[arg-type] + ) apis[api] = routes @@ -98,7 +80,7 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No impl = impls[api] func = getattr(impl, route.name) # Get the first (and typically only) method from the set, filtering out HEAD - available_methods = [m for m in route.methods if m != "HEAD"] + available_methods = [m for m in (route.methods or []) if m != "HEAD"] if not available_methods: continue # Skip if only HEAD method is available method = available_methods[0].lower() diff --git a/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py similarity index 94% rename from llama_stack/core/server/server.py rename to src/llama_stack/core/server/server.py index dd21a72f9..9a01eb75e 100644 --- a/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -31,12 +31,9 @@ from fastapi.responses import JSONResponse, StreamingResponse from openai import BadRequestError from pydantic import BaseModel, ValidationError -from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError -from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.core.access_control.access_control import AccessDeniedError from llama_stack.core.datatypes import ( AuthenticationRequiredError, - LoggingConfig, StackRunConfig, process_cors_config, ) @@ -56,20 +53,11 @@ from llama_stack.core.stack import ( from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.context import preserve_contexts_async_generator -from llama_stack.log import get_logger, setup_logging -from llama_stack.providers.datatypes import Api -from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig -from llama_stack.providers.inline.telemetry.meta_reference.telemetry import ( - TelemetryAdapter, -) -from llama_stack.providers.utils.telemetry.tracing import ( - CURRENT_TRACE_CONTEXT, - setup_logger, -) +from llama_stack.log import LoggingConfig, get_logger, setup_logging +from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError from .auth import AuthenticationMiddleware from .quota import QuotaMiddleware -from .tracing import TracingMiddleware REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -174,7 +162,9 @@ class StackApp(FastAPI): @asynccontextmanager async def lifespan(app: StackApp): - logger.info("Starting up") + server_version = parse_version("llama-stack") + + logger.info(f"Starting up Llama Stack server (version: {server_version})") assert app.stack is not None app.stack.create_registry_refresh_task() yield @@ -270,7 +260,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: try: if is_streaming: - context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR] + context_vars = [PROVIDER_DATA_VAR] if test_context_var is not None: context_vars.append(test_context_var) gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars) @@ -448,11 +438,6 @@ def create_app() -> StackApp: if cors_config: app.add_middleware(CORSMiddleware, **cors_config.model_dump()) - if config.telemetry.enabled: - setup_logger(impls[Api.telemetry]) - else: - setup_logger(TelemetryAdapter(TelemetryConfig(), {})) - # Load external APIs if configured external_apis = load_external_apis(config) all_routes = get_all_api_routes(external_apis) @@ -509,8 +494,6 @@ def create_app() -> StackApp: app.exception_handler(RequestValidationError)(global_exception_handler) app.exception_handler(Exception)(global_exception_handler) - app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis) - return app @@ -532,8 +515,8 @@ def extract_path_params(route: str) -> list[str]: def remove_disabled_providers(obj): if isinstance(obj, dict): - keys = ["provider_id", "shield_id", "provider_model_id", "model_id"] - if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys): + # Filter out items where provider_id is explicitly disabled or empty + if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None): return None return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None} elif isinstance(obj, list): diff --git a/llama_stack/core/stack.py b/src/llama_stack/core/stack.py similarity index 91% rename from llama_stack/core/stack.py rename to src/llama_stack/core/stack.py index 4cf1d072d..8ba1f2afd 100644 --- a/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -13,29 +13,8 @@ from typing import Any import yaml -from llama_stack.apis.agents import Agents -from llama_stack.apis.benchmarks import Benchmarks -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.eval import Eval -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.inspect import Inspect -from llama_stack.apis.models import Models -from llama_stack.apis.post_training import PostTraining -from llama_stack.apis.prompts import Prompts -from llama_stack.apis.providers import Providers -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFunctions -from llama_stack.apis.shields import Shields -from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration -from llama_stack.apis.telemetry import Telemetry -from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig +from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl @@ -55,7 +34,30 @@ from llama_stack.core.storage.datatypes import ( from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api +from llama_stack_api import ( + Agents, + Api, + Batches, + Benchmarks, + Conversations, + DatasetIO, + Datasets, + Eval, + Files, + Inference, + Inspect, + Models, + PostTraining, + Prompts, + Providers, + Safety, + Scoring, + ScoringFunctions, + Shields, + ToolGroups, + ToolRuntime, + VectorIO, +) logger = get_logger(name=__name__, category="core") @@ -64,10 +66,9 @@ class LlamaStack( Providers, Inference, Agents, + Batches, Safety, - SyntheticDataGeneration, Datasets, - Telemetry, PostTraining, VectorIO, Eval, @@ -80,7 +81,6 @@ class LlamaStack( Inspect, ToolGroups, ToolRuntime, - RAGToolRuntime, Files, Prompts, Conversations, @@ -175,6 +175,30 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") +async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]): + if safety_config is None or safety_config.default_shield_id is None: + return + + if Api.shields not in impls: + raise ValueError("Safety configuration requires the shields API to be enabled") + + if Api.safety not in impls: + raise ValueError("Safety configuration requires the safety API to be enabled") + + shields_impl = impls[Api.shields] + response = await shields_impl.list_shields() + shields_by_id = {shield.identifier: shield for shield in response.data} + + default_shield_id = safety_config.default_shield_id + # don't validate if there are no shields registered + if shields_by_id and default_shield_id not in shields_by_id: + available = sorted(shields_by_id) + raise ValueError( + f"Configured default_shield_id '{default_shield_id}' not found among registered shields." + f" Available shields: {available}" + ) + + class EnvVarError(Exception): def __init__(self, var_name: str, path: str = ""): self.var_name = var_name @@ -361,8 +385,8 @@ def _initialize_storage(run_config: StackRunConfig): else: raise ValueError(f"Unknown storage backend type: {type}") - from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends - from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends + from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends register_kvstore_backends(kv_backends) register_sqlstore_backends(sql_backends) @@ -412,6 +436,7 @@ class Stack: await register_resources(self.run_config, impls) await refresh_registry_once(impls) await validate_vector_stores_config(self.run_config.vector_stores, impls) + await validate_safety_config(self.run_config.safety, impls) self.impls = impls def create_registry_refresh_task(self): @@ -540,6 +565,7 @@ def run_config_from_adhoc_config_spec( metadata=KVStoreReference(backend="kv_default", namespace="registry"), inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + prompts=KVStoreReference(backend="kv_default", namespace="prompts"), ), ), ) diff --git a/llama_stack/core/start_stack.sh b/src/llama_stack/core/start_stack.sh similarity index 100% rename from llama_stack/core/start_stack.sh rename to src/llama_stack/core/start_stack.sh diff --git a/llama_stack/core/routing_tables/__init__.py b/src/llama_stack/core/storage/__init__.py similarity index 100% rename from llama_stack/core/routing_tables/__init__.py rename to src/llama_stack/core/storage/__init__.py diff --git a/src/llama_stack/core/storage/datatypes.py b/src/llama_stack/core/storage/datatypes.py new file mode 100644 index 000000000..527c1b828 --- /dev/null +++ b/src/llama_stack/core/storage/datatypes.py @@ -0,0 +1,306 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +from abc import abstractmethod +from enum import StrEnum +from pathlib import Path +from typing import Annotated, Literal + +from pydantic import BaseModel, Field, field_validator + +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR + + +class StorageBackendType(StrEnum): + KV_REDIS = "kv_redis" + KV_SQLITE = "kv_sqlite" + KV_POSTGRES = "kv_postgres" + KV_MONGODB = "kv_mongodb" + SQL_SQLITE = "sql_sqlite" + SQL_POSTGRES = "sql_postgres" + + +class CommonConfig(BaseModel): + namespace: str | None = Field( + default=None, + description="All keys will be prefixed with this namespace", + ) + + +class RedisKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS + host: str = "localhost" + port: int = 6379 + + @property + def url(self) -> str: + return f"redis://{self.host}:{self.port}" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["redis"] + + @classmethod + def sample_run_config(cls): + return { + "type": StorageBackendType.KV_REDIS.value, + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", + } + + +class SqliteKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE + db_path: str = Field( + description="File path for the sqlite database", + ) + + @classmethod + def pip_packages(cls) -> list[str]: + return ["aiosqlite"] + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): + return { + "type": StorageBackendType.KV_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + +class PostgresKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + ssl_mode: str | None = None + ca_cert_path: str | None = None + table_name: str = "llamastack_kvstore" + + @classmethod + def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): + return { + "type": StorageBackendType.KV_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", + } + + @classmethod + @field_validator("table_name") + def validate_table_name(cls, v: str) -> str: + # PostgreSQL identifiers rules: + # - Must start with a letter or underscore + # - Can contain letters, numbers, and underscores + # - Maximum length is 63 bytes + pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$" + if not re.match(pattern, v): + raise ValueError( + "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores" + ) + if len(v) > 63: + raise ValueError("Table name must be less than 63 characters") + return v + + @classmethod + def pip_packages(cls) -> list[str]: + return ["psycopg2-binary"] + + +class MongoDBKVStoreConfig(CommonConfig): + type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB + host: str = "localhost" + port: int = 27017 + db: str = "llamastack" + user: str | None = None + password: str | None = None + collection_name: str = "llamastack_kvstore" + + @classmethod + def pip_packages(cls) -> list[str]: + return ["pymongo"] + + @classmethod + def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): + return { + "type": StorageBackendType.KV_MONGODB.value, + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", + "db": "${env.MONGODB_DB}", + "user": "${env.MONGODB_USER}", + "password": "${env.MONGODB_PASSWORD}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", + } + + +class SqlAlchemySqlStoreConfig(BaseModel): + @property + @abstractmethod + def engine_str(self) -> str: ... + + # TODO: move this when we have a better way to specify dependencies with internal APIs + @classmethod + def pip_packages(cls) -> list[str]: + return ["sqlalchemy[asyncio]"] + + +class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE + db_path: str = Field( + description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db", + ) + + @property + def engine_str(self) -> str: + return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix() + + @classmethod + def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): + return { + "type": StorageBackendType.SQL_SQLITE.value, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["aiosqlite"] + + +class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): + type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES + host: str = "localhost" + port: int | str = 5432 + db: str = "llamastack" + user: str + password: str | None = None + + @property + def engine_str(self) -> str: + return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" + + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["asyncpg"] + + @classmethod + def sample_run_config(cls, **kwargs): + return { + "type": StorageBackendType.SQL_POSTGRES.value, + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + } + + +# reference = (backend_name, table_name) +class SqlStoreReference(BaseModel): + """A reference to a 'SQL-like' persistent store. A table name must be provided.""" + + table_name: str = Field( + description="Name of the table to use for the SqlStore", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +# reference = (backend_name, namespace) +class KVStoreReference(BaseModel): + """A reference to a 'key-value' persistent store. A namespace must be provided.""" + + namespace: str = Field( + description="Key prefix for KVStore backends", + ) + + backend: str = Field( + description="Name of backend from storage.backends", + ) + + +StorageBackendConfig = Annotated[ + RedisKVStoreConfig + | SqliteKVStoreConfig + | PostgresKVStoreConfig + | MongoDBKVStoreConfig + | SqliteSqlStoreConfig + | PostgresSqlStoreConfig, + Field(discriminator="type"), +] + + +class InferenceStoreReference(SqlStoreReference): + """Inference store configuration with queue tuning.""" + + max_write_queue_size: int = Field( + default=10000, + description="Max queued writes for inference store", + ) + num_writers: int = Field( + default=4, + description="Number of concurrent background writers", + ) + + +class ResponsesStoreReference(InferenceStoreReference): + """Responses store configuration with queue tuning.""" + + +class ServerStoresConfig(BaseModel): + metadata: KVStoreReference | None = Field( + default=KVStoreReference( + backend="kv_default", + namespace="registry", + ), + description="Metadata store configuration (uses KV backend)", + ) + inference: InferenceStoreReference | None = Field( + default=InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ), + description="Inference store configuration (uses SQL backend)", + ) + conversations: SqlStoreReference | None = Field( + default=SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ), + description="Conversations store configuration (uses SQL backend)", + ) + responses: ResponsesStoreReference | None = Field( + default=None, + description="Responses store configuration (uses SQL backend)", + ) + prompts: KVStoreReference | None = Field( + default=KVStoreReference(backend="kv_default", namespace="prompts"), + description="Prompts store configuration (uses KV backend)", + ) + + +class StorageConfig(BaseModel): + backends: dict[str, StorageBackendConfig] = Field( + default={ + "kv_default": SqliteKVStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db", + ), + }, + description="Named backend configurations (e.g., 'default', 'cache')", + ) + stores: ServerStoresConfig = Field( + default_factory=lambda: ServerStoresConfig(), + description="Named references to storage backends used by the stack core", + ) diff --git a/src/llama_stack/core/storage/kvstore/__init__.py b/src/llama_stack/core/storage/kvstore/__init__.py new file mode 100644 index 000000000..2d60f1508 --- /dev/null +++ b/src/llama_stack/core/storage/kvstore/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api.internal.kvstore import KVStore as KVStore + +from .kvstore import * # noqa: F401, F403 diff --git a/llama_stack/providers/utils/kvstore/config.py b/src/llama_stack/core/storage/kvstore/config.py similarity index 100% rename from llama_stack/providers/utils/kvstore/config.py rename to src/llama_stack/core/storage/kvstore/config.py diff --git a/src/llama_stack/core/storage/kvstore/kvstore.py b/src/llama_stack/core/storage/kvstore/kvstore.py new file mode 100644 index 000000000..8ea9282fa --- /dev/null +++ b/src/llama_stack/core/storage/kvstore/kvstore.py @@ -0,0 +1,128 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from __future__ import annotations + +import asyncio +from collections import defaultdict +from datetime import datetime +from typing import cast + +from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig +from llama_stack_api.internal.kvstore import KVStore + +from .config import ( + KVStoreConfig, + MongoDBKVStoreConfig, + PostgresKVStoreConfig, + RedisKVStoreConfig, + SqliteKVStoreConfig, +) + + +def kvstore_dependencies(): + """ + Returns all possible kvstore dependencies for registry/provider specifications. + + NOTE: For specific kvstore implementations, use config.pip_packages instead. + This function returns the union of all dependencies for cases where the specific + kvstore type is not known at declaration time (e.g., provider registries). + """ + return ["aiosqlite", "psycopg2-binary", "redis", "pymongo"] + + +class InmemoryKVStoreImpl(KVStore): + def __init__(self): + self._store: dict[str, str] = {} + + async def initialize(self) -> None: + pass + + async def get(self, key: str) -> str | None: + return self._store.get(key) + + async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: + self._store[key] = value + + async def values_in_range(self, start_key: str, end_key: str) -> list[str]: + return [self._store[key] for key in self._store.keys() if key >= start_key and key < end_key] + + async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: + """Get all keys in the given range.""" + return [key for key in self._store.keys() if key >= start_key and key < end_key] + + async def delete(self, key: str) -> None: + del self._store[key] + + +_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} +_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {} +_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock) + + +def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available KV store backends for reference resolution.""" + global _KVSTORE_BACKENDS + global _KVSTORE_INSTANCES + global _KVSTORE_LOCKS + + _KVSTORE_BACKENDS.clear() + _KVSTORE_INSTANCES.clear() + _KVSTORE_LOCKS.clear() + for name, cfg in backends.items(): + typed_cfg = cast(KVStoreConfig, cfg) + _KVSTORE_BACKENDS[name] = typed_cfg + + +async def kvstore_impl(reference: KVStoreReference) -> KVStore: + backend_name = reference.backend + cache_key = (backend_name, reference.namespace) + + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing + + backend_config = _KVSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") + + lock = _KVSTORE_LOCKS[cache_key] + async with lock: + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing + + config = backend_config.model_copy() + config.namespace = reference.namespace + + impl: KVStore + if isinstance(config, RedisKVStoreConfig): + from .redis import RedisKVStoreImpl + + impl = RedisKVStoreImpl(config) + elif isinstance(config, SqliteKVStoreConfig): + from .sqlite import SqliteKVStoreImpl + + impl = SqliteKVStoreImpl(config) + elif isinstance(config, PostgresKVStoreConfig): + from .postgres import PostgresKVStoreImpl + + impl = PostgresKVStoreImpl(config) + elif isinstance(config, MongoDBKVStoreConfig): + from .mongodb import MongoDBKVStoreImpl + + impl = MongoDBKVStoreImpl(config) + else: + raise ValueError(f"Unknown kvstore type {config.type}") + + await impl.initialize() + _KVSTORE_INSTANCES[cache_key] = impl + return impl diff --git a/llama_stack/providers/utils/kvstore/mongodb/__init__.py b/src/llama_stack/core/storage/kvstore/mongodb/__init__.py similarity index 100% rename from llama_stack/providers/utils/kvstore/mongodb/__init__.py rename to src/llama_stack/core/storage/kvstore/mongodb/__init__.py diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/src/llama_stack/core/storage/kvstore/mongodb/mongodb.py similarity index 79% rename from llama_stack/providers/utils/kvstore/mongodb/mongodb.py rename to src/llama_stack/core/storage/kvstore/mongodb/mongodb.py index 4d60949c1..673d6038f 100644 --- a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py +++ b/src/llama_stack/core/storage/kvstore/mongodb/mongodb.py @@ -9,8 +9,8 @@ from datetime import datetime from pymongo import AsyncMongoClient from pymongo.asynchronous.collection import AsyncCollection +from llama_stack.core.storage.kvstore import KVStore from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import KVStore from ..config import MongoDBKVStoreConfig @@ -30,14 +30,13 @@ class MongoDBKVStoreImpl(KVStore): async def initialize(self) -> None: try: - conn_creds = { - "host": self.config.host, - "port": self.config.port, - "username": self.config.user, - "password": self.config.password, - } - conn_creds = {k: v for k, v in conn_creds.items() if v is not None} - self.conn = AsyncMongoClient(**conn_creds) + # Pass parameters explicitly to satisfy mypy - AsyncMongoClient doesn't accept **dict + self.conn = AsyncMongoClient( + host=self.config.host if self.config.host is not None else None, + port=self.config.port if self.config.port is not None else None, + username=self.config.user if self.config.user is not None else None, + password=self.config.password if self.config.password is not None else None, + ) except Exception as e: log.exception("Could not connect to MongoDB database server") raise RuntimeError("Could not connect to MongoDB database server") from e @@ -79,4 +78,8 @@ class MongoDBKVStoreImpl(KVStore): end_key = self._namespaced_key(end_key) query = {"key": {"$gte": start_key, "$lt": end_key}} cursor = self.collection.find(query, {"key": 1, "_id": 0}).sort("key", 1) - return [doc["key"] for doc in cursor] + # AsyncCursor requires async iteration + result = [] + async for doc in cursor: + result.append(doc["key"]) + return result diff --git a/llama_stack/providers/utils/kvstore/postgres/__init__.py b/src/llama_stack/core/storage/kvstore/postgres/__init__.py similarity index 100% rename from llama_stack/providers/utils/kvstore/postgres/__init__.py rename to src/llama_stack/core/storage/kvstore/postgres/__init__.py diff --git a/src/llama_stack/core/storage/kvstore/postgres/postgres.py b/src/llama_stack/core/storage/kvstore/postgres/postgres.py new file mode 100644 index 000000000..39c3fd2e2 --- /dev/null +++ b/src/llama_stack/core/storage/kvstore/postgres/postgres.py @@ -0,0 +1,125 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import datetime + +import psycopg2 # type: ignore[import-not-found] +from psycopg2.extensions import connection as PGConnection # type: ignore[import-not-found] +from psycopg2.extras import DictCursor # type: ignore[import-not-found] + +from llama_stack.log import get_logger +from llama_stack_api.internal.kvstore import KVStore + +from ..config import PostgresKVStoreConfig + +log = get_logger(name=__name__, category="providers::utils") + + +class PostgresKVStoreImpl(KVStore): + def __init__(self, config: PostgresKVStoreConfig): + self.config = config + self._conn: PGConnection | None = None + self._cursor: DictCursor | None = None + + async def initialize(self) -> None: + try: + self._conn = psycopg2.connect( + host=self.config.host, + port=self.config.port, + database=self.config.db, + user=self.config.user, + password=self.config.password, + sslmode=self.config.ssl_mode, + sslrootcert=self.config.ca_cert_path, + ) + self._conn.autocommit = True + self._cursor = self._conn.cursor(cursor_factory=DictCursor) + + # Create table if it doesn't exist + self._cursor.execute( + f""" + CREATE TABLE IF NOT EXISTS {self.config.table_name} ( + key TEXT PRIMARY KEY, + value TEXT, + expiration TIMESTAMP + ) + """ + ) + except Exception as e: + log.exception("Could not connect to PostgreSQL database server") + raise RuntimeError("Could not connect to PostgreSQL database server") from e + + def _cursor_or_raise(self) -> DictCursor: + if self._cursor is None: + raise RuntimeError("Postgres client not initialized") + return self._cursor + + def _namespaced_key(self, key: str) -> str: + if not self.config.namespace: + return key + return f"{self.config.namespace}:{key}" + + async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: + key = self._namespaced_key(key) + cursor = self._cursor_or_raise() + cursor.execute( + f""" + INSERT INTO {self.config.table_name} (key, value, expiration) + VALUES (%s, %s, %s) + ON CONFLICT (key) DO UPDATE + SET value = EXCLUDED.value, expiration = EXCLUDED.expiration + """, + (key, value, expiration), + ) + + async def get(self, key: str) -> str | None: + key = self._namespaced_key(key) + cursor = self._cursor_or_raise() + cursor.execute( + f""" + SELECT value FROM {self.config.table_name} + WHERE key = %s + AND (expiration IS NULL OR expiration > NOW()) + """, + (key,), + ) + result = cursor.fetchone() + return result[0] if result else None + + async def delete(self, key: str) -> None: + key = self._namespaced_key(key) + cursor = self._cursor_or_raise() + cursor.execute( + f"DELETE FROM {self.config.table_name} WHERE key = %s", + (key,), + ) + + async def values_in_range(self, start_key: str, end_key: str) -> list[str]: + start_key = self._namespaced_key(start_key) + end_key = self._namespaced_key(end_key) + + cursor = self._cursor_or_raise() + cursor.execute( + f""" + SELECT value FROM {self.config.table_name} + WHERE key >= %s AND key < %s + AND (expiration IS NULL OR expiration > NOW()) + ORDER BY key + """, + (start_key, end_key), + ) + return [row[0] for row in cursor.fetchall()] + + async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: + start_key = self._namespaced_key(start_key) + end_key = self._namespaced_key(end_key) + + cursor = self._cursor_or_raise() + cursor.execute( + f"SELECT key FROM {self.config.table_name} WHERE key >= %s AND key < %s", + (start_key, end_key), + ) + return [row[0] for row in cursor.fetchall()] diff --git a/llama_stack/providers/utils/kvstore/redis/__init__.py b/src/llama_stack/core/storage/kvstore/redis/__init__.py similarity index 100% rename from llama_stack/providers/utils/kvstore/redis/__init__.py rename to src/llama_stack/core/storage/kvstore/redis/__init__.py diff --git a/src/llama_stack/core/storage/kvstore/redis/redis.py b/src/llama_stack/core/storage/kvstore/redis/redis.py new file mode 100644 index 000000000..2b35a22e1 --- /dev/null +++ b/src/llama_stack/core/storage/kvstore/redis/redis.py @@ -0,0 +1,101 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import datetime + +from redis.asyncio import Redis # type: ignore[import-not-found] + +from llama_stack_api.internal.kvstore import KVStore + +from ..config import RedisKVStoreConfig + + +class RedisKVStoreImpl(KVStore): + def __init__(self, config: RedisKVStoreConfig): + self.config = config + self._redis: Redis | None = None + + async def initialize(self) -> None: + self._redis = Redis.from_url(self.config.url) + + def _client(self) -> Redis: + if self._redis is None: + raise RuntimeError("Redis client not initialized") + return self._redis + + def _namespaced_key(self, key: str) -> str: + if not self.config.namespace: + return key + return f"{self.config.namespace}:{key}" + + async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: + key = self._namespaced_key(key) + client = self._client() + await client.set(key, value) + if expiration: + await client.expireat(key, expiration) + + async def get(self, key: str) -> str | None: + key = self._namespaced_key(key) + client = self._client() + value = await client.get(key) + if value is None: + return None + await client.ttl(key) + if isinstance(value, bytes): + return value.decode("utf-8") + if isinstance(value, str): + return value + return str(value) + + async def delete(self, key: str) -> None: + key = self._namespaced_key(key) + await self._client().delete(key) + + async def values_in_range(self, start_key: str, end_key: str) -> list[str]: + start_key = self._namespaced_key(start_key) + end_key = self._namespaced_key(end_key) + client = self._client() + cursor = 0 + pattern = start_key + "*" # Match all keys starting with start_key prefix + matching_keys: list[str | bytes] = [] + while True: + cursor, keys = await client.scan(cursor, match=pattern, count=1000) + + for key in keys: + key_str = key.decode("utf-8") if isinstance(key, bytes) else key + if start_key <= key_str <= end_key: + matching_keys.append(key) + + if cursor == 0: + break + + # Then fetch all values in a single MGET call + if matching_keys: + values = await client.mget(matching_keys) + return [ + value.decode("utf-8") if isinstance(value, bytes) else value for value in values if value is not None + ] + + return [] + + async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: + """Get all keys in the given range.""" + start_key = self._namespaced_key(start_key) + end_key = self._namespaced_key(end_key) + client = self._client() + cursor = 0 + pattern = start_key + "*" + result: list[str] = [] + while True: + cursor, keys = await client.scan(cursor, match=pattern, count=1000) + for key in keys: + key_str = key.decode("utf-8") if isinstance(key, bytes) else str(key) + if start_key <= key_str <= end_key: + result.append(key_str) + if cursor == 0: + break + return result diff --git a/llama_stack/providers/utils/kvstore/sqlite/__init__.py b/src/llama_stack/core/storage/kvstore/sqlite/__init__.py similarity index 100% rename from llama_stack/providers/utils/kvstore/sqlite/__init__.py rename to src/llama_stack/core/storage/kvstore/sqlite/__init__.py diff --git a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py b/src/llama_stack/core/storage/kvstore/sqlite/sqlite.py similarity index 99% rename from llama_stack/providers/utils/kvstore/sqlite/sqlite.py rename to src/llama_stack/core/storage/kvstore/sqlite/sqlite.py index a9a7a1304..22cf8ac49 100644 --- a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py +++ b/src/llama_stack/core/storage/kvstore/sqlite/sqlite.py @@ -10,8 +10,8 @@ from datetime import datetime import aiosqlite from llama_stack.log import get_logger +from llama_stack_api.internal.kvstore import KVStore -from ..api import KVStore from ..config import SqliteKVStoreConfig logger = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/core/storage/sqlstore/__init__.py b/src/llama_stack/core/storage/sqlstore/__init__.py new file mode 100644 index 000000000..eb843e4ba --- /dev/null +++ b/src/llama_stack/core/storage/sqlstore/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api.internal.sqlstore import ( + ColumnDefinition as ColumnDefinition, +) +from llama_stack_api.internal.sqlstore import ( + ColumnType as ColumnType, +) +from llama_stack_api.internal.sqlstore import ( + SqlStore as SqlStore, +) + +from .sqlstore import * # noqa: F401,F403 diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py similarity index 84% rename from llama_stack/providers/utils/sqlstore/authorized_sqlstore.py rename to src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py index 3dfc82677..e6cdcc543 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/src/llama_stack/core/storage/sqlstore/authorized_sqlstore.py @@ -14,8 +14,8 @@ from llama_stack.core.datatypes import User from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.storage.datatypes import StorageBackendType from llama_stack.log import get_logger - -from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore +from llama_stack_api import PaginatedResponse +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore logger = get_logger(name=__name__, category="providers::utils") @@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use enhanced["owner_principal"] = current_user.principal enhanced["access_attributes"] = current_user.attributes else: - enhanced["owner_principal"] = None - enhanced["access_attributes"] = None + # IMPORTANT: Use empty string and null value (not None) to match public access filter + # The public access filter in _get_public_access_conditions() expects: + # - owner_principal = '' (empty string) + # - access_attributes = null (JSON null, which serializes to the string 'null') + # Setting them to None (SQL NULL) will cause rows to be filtered out on read. + enhanced["owner_principal"] = "" + enhanced["access_attributes"] = None # Pydantic/JSON will serialize this as JSON null return enhanced @@ -124,6 +129,23 @@ class AuthorizedSqlStore: enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data] await self.sql_store.insert(table, enhanced_data) + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + """Upsert a row with automatic access control attribute capture.""" + current_user = get_authenticated_user() + enhanced_data = _enhance_item_with_access_control(data, current_user) + await self.sql_store.upsert( + table=table, + data=enhanced_data, + conflict_columns=conflict_columns, + update_columns=update_columns, + ) + async def fetch_all( self, table: str, @@ -188,8 +210,9 @@ class AuthorizedSqlStore: enhanced_data["owner_principal"] = current_user.principal enhanced_data["access_attributes"] = current_user.attributes else: - enhanced_data["owner_principal"] = None - enhanced_data["access_attributes"] = None + # IMPORTANT: Use empty string for owner_principal to match public access filter + enhanced_data["owner_principal"] = "" + enhanced_data["access_attributes"] = None # Will serialize as JSON null await self.sql_store.update(table, enhanced_data, where) @@ -245,14 +268,24 @@ class AuthorizedSqlStore: raise ValueError(f"Unsupported database type: {self.database_type}") def _get_public_access_conditions(self) -> list[str]: - """Get the SQL conditions for public access.""" - # Public records are records that have no owner_principal or access_attributes + """Get the SQL conditions for public access. + + Public records are those with: + - owner_principal = '' (empty string) + - access_attributes is either SQL NULL or JSON null + + Note: Different databases serialize None differently: + - SQLite: None → JSON null (text = 'null') + - Postgres: None → SQL NULL (IS NULL) + """ conditions = ["owner_principal = ''"] if self.database_type == StorageBackendType.SQL_POSTGRES.value: - # Postgres stores JSON null as 'null' - conditions.append("access_attributes::text = 'null'") + # Accept both SQL NULL and JSON null for Postgres compatibility + # This handles both old rows (SQL NULL) and new rows (JSON null) + conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')") elif self.database_type == StorageBackendType.SQL_SQLITE.value: - conditions.append("access_attributes = 'null'") + # SQLite serializes None as JSON null + conditions.append("(access_attributes IS NULL OR access_attributes = 'null')") else: raise ValueError(f"Unsupported database type: {self.database_type}") return conditions diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py similarity index 77% rename from llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py rename to src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py index c1ccd73dd..01c561443 100644 --- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/src/llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. from collections.abc import Mapping, Sequence -from typing import Any, Literal +from typing import Any, Literal, cast from sqlalchemy import ( JSON, @@ -17,6 +17,7 @@ from sqlalchemy import ( String, Table, Text, + event, inspect, select, text, @@ -25,11 +26,10 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.sql.elements import ColumnElement -from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig from llama_stack.log import get_logger - -from .api import ColumnDefinition, ColumnType, SqlStore +from llama_stack_api import PaginatedResponse +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore logger = get_logger(name=__name__, category="providers::utils") @@ -55,27 +55,57 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement: raise ValueError(f"Operator mapping must have a single operator, got: {value}") op, operand = next(iter(value.items())) if op == "==" or op == "=": - return column == operand + return cast(ColumnElement[Any], column == operand) if op == ">": - return column > operand + return cast(ColumnElement[Any], column > operand) if op == "<": - return column < operand + return cast(ColumnElement[Any], column < operand) if op == ">=": - return column >= operand + return cast(ColumnElement[Any], column >= operand) if op == "<=": - return column <= operand + return cast(ColumnElement[Any], column <= operand) raise ValueError(f"Unsupported operator '{op}' in where mapping") - return column == value + return cast(ColumnElement[Any], column == value) class SqlAlchemySqlStoreImpl(SqlStore): def __init__(self, config: SqlAlchemySqlStoreConfig): self.config = config + self._is_sqlite_backend = "sqlite" in self.config.engine_str self.async_session = async_sessionmaker(self.create_engine()) self.metadata = MetaData() def create_engine(self) -> AsyncEngine: - return create_async_engine(self.config.engine_str, pool_pre_ping=True) + # Configure connection args for better concurrency support + connect_args = {} + if self._is_sqlite_backend: + # SQLite-specific optimizations for concurrent access + # With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases + connect_args["timeout"] = 5.0 + connect_args["check_same_thread"] = False # Allow usage across asyncio tasks + + engine = create_async_engine( + self.config.engine_str, + pool_pre_ping=True, + connect_args=connect_args, + ) + + # Enable WAL mode for SQLite to support concurrent readers and writers + if self._is_sqlite_backend: + + @event.listens_for(engine.sync_engine, "connect") + def set_sqlite_pragma(dbapi_conn, connection_record): + cursor = dbapi_conn.cursor() + # Enable Write-Ahead Logging for better concurrency + cursor.execute("PRAGMA journal_mode=WAL") + # Set busy timeout to 5 seconds (retry instead of immediate failure) + # With WAL mode, locks should be brief; if we hit 5s there's a bigger issue + cursor.execute("PRAGMA busy_timeout=5000") + # Use NORMAL synchronous mode for better performance (still safe with WAL) + cursor.execute("PRAGMA synchronous=NORMAL") + cursor.close() + + return engine async def create_table( self, @@ -121,6 +151,29 @@ class SqlAlchemySqlStoreImpl(SqlStore): await session.execute(self.metadata.tables[table].insert(), data) await session.commit() + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + table_obj = self.metadata.tables[table] + dialect_insert = self._get_dialect_insert(table_obj) + insert_stmt = dialect_insert.values(**data) + + if update_columns is None: + update_columns = [col for col in data.keys() if col not in conflict_columns] + + update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns} + conflict_cols = [table_obj.c[col] for col in conflict_columns] + + stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping) + + async with self.async_session() as session: + await session.execute(stmt) + await session.commit() + async def fetch_all( self, table: str, @@ -210,10 +263,8 @@ class SqlAlchemySqlStoreImpl(SqlStore): query = query.limit(fetch_limit) result = await session.execute(query) - if result.rowcount == 0: - rows = [] - else: - rows = [dict(row._mapping) for row in result] + # Iterate directly - if no rows, list comprehension yields empty list + rows = [dict(row._mapping) for row in result] # Always return pagination result has_more = False @@ -305,9 +356,18 @@ class SqlAlchemySqlStoreImpl(SqlStore): add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}") await conn.execute(add_column_sql) - except Exception as e: # If any error occurs during migration, log it but don't fail # The table creation will handle adding the column logger.error(f"Error adding column {column_name} to table {table}: {e}") pass + + def _get_dialect_insert(self, table: Table): + if self._is_sqlite_backend: + from sqlalchemy.dialects.sqlite import insert as sqlite_insert + + return sqlite_insert(table) + else: + from sqlalchemy.dialects.postgresql import insert as pg_insert + + return pg_insert(table) diff --git a/src/llama_stack/core/storage/sqlstore/sqlstore.py b/src/llama_stack/core/storage/sqlstore/sqlstore.py new file mode 100644 index 000000000..fb2c9d279 --- /dev/null +++ b/src/llama_stack/core/storage/sqlstore/sqlstore.py @@ -0,0 +1,87 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from threading import Lock +from typing import Annotated, cast + +from pydantic import Field + +from llama_stack.core.storage.datatypes import ( + PostgresSqlStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageBackendType, +) +from llama_stack_api.internal.sqlstore import SqlStore + +sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] + +_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} +_SQLSTORE_INSTANCES: dict[str, SqlStore] = {} +_SQLSTORE_LOCKS: dict[str, Lock] = {} + + +SqlStoreConfig = Annotated[ + SqliteSqlStoreConfig | PostgresSqlStoreConfig, + Field(discriminator="type"), +] + + +def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: + """Get pip packages for SQL store config, handling both dict and object cases.""" + if isinstance(store_config, dict): + store_type = store_config.get("type") + if store_type == StorageBackendType.SQL_SQLITE.value: + return SqliteSqlStoreConfig.pip_packages() + elif store_type == StorageBackendType.SQL_POSTGRES.value: + return PostgresSqlStoreConfig.pip_packages() + else: + raise ValueError(f"Unknown SQL store type: {store_type}") + else: + return store_config.pip_packages() + + +def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: + backend_name = reference.backend + + backend_config = _SQLSTORE_BACKENDS.get(backend_name) + if backend_config is None: + raise ValueError( + f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" + ) + + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing + + lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock()) + with lock: + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing + + if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): + from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl + + config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() + instance = SqlAlchemySqlStoreImpl(config) + _SQLSTORE_INSTANCES[backend_name] = instance + return instance + else: + raise ValueError(f"Unknown sqlstore type {backend_config.type}") + + +def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: + """Register the set of available SQL store backends for reference resolution.""" + global _SQLSTORE_BACKENDS + global _SQLSTORE_INSTANCES + + _SQLSTORE_BACKENDS.clear() + _SQLSTORE_INSTANCES.clear() + _SQLSTORE_LOCKS.clear() + for name, cfg in backends.items(): + _SQLSTORE_BACKENDS[name] = cfg diff --git a/llama_stack/core/store/__init__.py b/src/llama_stack/core/store/__init__.py similarity index 100% rename from llama_stack/core/store/__init__.py rename to src/llama_stack/core/store/__init__.py diff --git a/llama_stack/core/store/registry.py b/src/llama_stack/core/store/registry.py similarity index 99% rename from llama_stack/core/store/registry.py rename to src/llama_stack/core/store/registry.py index 6ff9e575b..7144a94f7 100644 --- a/llama_stack/core/store/registry.py +++ b/src/llama_stack/core/store/registry.py @@ -12,8 +12,8 @@ import pydantic from llama_stack.core.datatypes import RoutableObjectWithProvider from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack.core.storage.kvstore import KVStore, kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl logger = get_logger(__name__, category="core::registry") diff --git a/llama_stack/core/testing_context.py b/src/llama_stack/core/testing_context.py similarity index 100% rename from llama_stack/core/testing_context.py rename to src/llama_stack/core/testing_context.py diff --git a/llama_stack/core/server/__init__.py b/src/llama_stack/core/utils/__init__.py similarity index 100% rename from llama_stack/core/server/__init__.py rename to src/llama_stack/core/utils/__init__.py diff --git a/llama_stack/core/utils/config.py b/src/llama_stack/core/utils/config.py similarity index 100% rename from llama_stack/core/utils/config.py rename to src/llama_stack/core/utils/config.py diff --git a/llama_stack/core/utils/config_dirs.py b/src/llama_stack/core/utils/config_dirs.py similarity index 100% rename from llama_stack/core/utils/config_dirs.py rename to src/llama_stack/core/utils/config_dirs.py diff --git a/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py similarity index 84% rename from llama_stack/core/utils/config_resolution.py rename to src/llama_stack/core/utils/config_resolution.py index fcf057db6..2a85837b6 100644 --- a/llama_stack/core/utils/config_resolution.py +++ b/src/llama_stack/core/utils/config_resolution.py @@ -52,7 +52,17 @@ def resolve_config_or_distro( logger.debug(f"Using distribution: {distro_config}") return distro_config - # Strategy 3: Try as built distribution name + # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash) + # eg: starter::run-with-postgres-store.yaml + # Use :: to avoid slash and confusion with a filesystem path + if "::" in config_or_distro: + distro_name, config_name = config_or_distro.split("::") + distro_config = _get_distro_config_path(distro_name, config_name) + if distro_config.exists(): + logger.info(f"Using distribution: {distro_config}") + return distro_config + + # Strategy 4: Try as built distribution name distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" if distrib_config.exists(): logger.debug(f"Using built distribution: {distrib_config}") @@ -63,13 +73,15 @@ def resolve_config_or_distro( logger.debug(f"Using built distribution: {distrib_config}") return distrib_config - # Strategy 4: Failed - provide helpful error + # Strategy 5: Failed - provide helpful error raise ValueError(_format_resolution_error(config_or_distro, mode)) -def _get_distro_config_path(distro_name: str, mode: Mode) -> Path: +def _get_distro_config_path(distro_name: str, mode: str) -> Path: """Get the config file path for a distro.""" - return DISTRO_DIR / distro_name / f"{mode}.yaml" + if not mode.endswith(".yaml"): + mode = f"{mode}.yaml" + return DISTRO_DIR / distro_name / mode def _format_resolution_error(config_or_distro: str, mode: Mode) -> str: diff --git a/src/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py new file mode 100644 index 000000000..0c3e41f00 --- /dev/null +++ b/src/llama_stack/core/utils/context.py @@ -0,0 +1,78 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncGenerator +from contextvars import ContextVar + +_MISSING = object() + + +def preserve_contexts_async_generator[T]( + gen: AsyncGenerator[T, None], context_vars: list[ContextVar] +) -> AsyncGenerator[T, None]: + """ + Wraps an async generator to preserve context variables across iterations. + This is needed because we start a new asyncio event loop for each streaming request, + and we need to preserve the context across the event loop boundary. + """ + # Capture initial context values + initial_context_values = {context_var.name: context_var.get() for context_var in context_vars} + + async def wrapper() -> AsyncGenerator[T, None]: + while True: + previous_values: dict[ContextVar, object] = {} + tokens: dict[ContextVar, object] = {} + + # Restore ALL context values before any await and capture previous state + # This is needed to propagate context across async generator boundaries + for context_var in context_vars: + try: + previous_values[context_var] = context_var.get() + except LookupError: + previous_values[context_var] = _MISSING + tokens[context_var] = context_var.set(initial_context_values[context_var.name]) + + def _restore_context_var(context_var: ContextVar, *, _tokens=tokens, _prev=previous_values) -> None: + token = _tokens.get(context_var) + previous_value = _prev.get(context_var, _MISSING) + if token is not None: + try: + context_var.reset(token) + return + except (RuntimeError, ValueError): + pass + + if previous_value is _MISSING: + context_var.set(None) + else: + context_var.set(previous_value) + + try: + item = await gen.__anext__() + except StopAsyncIteration: + # Restore all context vars before exiting to prevent leaks + # Use _restore_context_var for all vars to properly restore to previous values + for context_var in context_vars: + _restore_context_var(context_var) + break + except Exception: + # Restore all context vars on exception + for context_var in context_vars: + _restore_context_var(context_var) + raise + + try: + yield item + # Update our tracked values with any changes made during this iteration + # This allows context changes to persist across generator iterations + for context_var in context_vars: + initial_context_values[context_var.name] = context_var.get() + finally: + # Restore context vars after each yield to prevent leaks between requests + for context_var in context_vars: + _restore_context_var(context_var) + + return wrapper() diff --git a/llama_stack/core/utils/dynamic.py b/src/llama_stack/core/utils/dynamic.py similarity index 100% rename from llama_stack/core/utils/dynamic.py rename to src/llama_stack/core/utils/dynamic.py diff --git a/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py similarity index 87% rename from llama_stack/core/utils/exec.py rename to src/llama_stack/core/utils/exec.py index 12fb82d01..98964db2c 100644 --- a/llama_stack/core/utils/exec.py +++ b/src/llama_stack/core/utils/exec.py @@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int: text=True, check=False, ) + + # Print stdout and stderr if command failed + if result.returncode != 0: + log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}") + if result.stdout: + log.error(f"STDOUT: {result.stdout}") + if result.stderr: + log.error(f"STDERR: {result.stderr}") + return result.returncode except subprocess.SubprocessError as e: log.error(f"Subprocess error: {e}") diff --git a/llama_stack/core/utils/image_types.py b/src/llama_stack/core/utils/image_types.py similarity index 100% rename from llama_stack/core/utils/image_types.py rename to src/llama_stack/core/utils/image_types.py diff --git a/llama_stack/core/utils/model_utils.py b/src/llama_stack/core/utils/model_utils.py similarity index 100% rename from llama_stack/core/utils/model_utils.py rename to src/llama_stack/core/utils/model_utils.py diff --git a/llama_stack/core/utils/prompt_for_config.py b/src/llama_stack/core/utils/prompt_for_config.py similarity index 100% rename from llama_stack/core/utils/prompt_for_config.py rename to src/llama_stack/core/utils/prompt_for_config.py diff --git a/llama_stack/core/utils/serialize.py b/src/llama_stack/core/utils/serialize.py similarity index 100% rename from llama_stack/core/utils/serialize.py rename to src/llama_stack/core/utils/serialize.py diff --git a/src/llama_stack/core/utils/type_inspection.py b/src/llama_stack/core/utils/type_inspection.py new file mode 100644 index 000000000..31e7f2328 --- /dev/null +++ b/src/llama_stack/core/utils/type_inspection.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Utility functions for type inspection and parameter handling. +""" + +import inspect +import typing +from typing import Any, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo + + +def is_unwrapped_body_param(param_type: Any) -> bool: + """ + Check if a parameter type represents an unwrapped body parameter. + An unwrapped body parameter is an Annotated type with Body(embed=False) + + This is used to determine whether request parameters should be flattened + in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior). + + Args: + param_type: The parameter type annotation to check + + Returns: + True if the parameter should be treated as an unwrapped body parameter + """ + # Check if it's Annotated with Body(embed=False) + if get_origin(param_type) is typing.Annotated: + args = get_args(param_type) + base_type = args[0] + metadata = args[1:] + + # Look for Body annotation with embed=False + # Body() returns a FieldInfo object, so we check for that type and the embed attribute + for item in metadata: + if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed: + return inspect.isclass(base_type) and issubclass(base_type, BaseModel) + + return False diff --git a/llama_stack/core/storage/__init__.py b/src/llama_stack/distributions/__init__.py similarity index 100% rename from llama_stack/core/storage/__init__.py rename to src/llama_stack/distributions/__init__.py diff --git a/llama_stack/distributions/ci-tests/__init__.py b/src/llama_stack/distributions/ci-tests/__init__.py similarity index 100% rename from llama_stack/distributions/ci-tests/__init__.py rename to src/llama_stack/distributions/ci-tests/__init__.py diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml new file mode 100644 index 000000000..f29ac7712 --- /dev/null +++ b/src/llama_stack/distributions/ci-tests/build.yaml @@ -0,0 +1,60 @@ +version: 2 +distribution_spec: + description: CI tests for Llama Stack + providers: + inference: + - provider_type: remote::cerebras + - provider_type: remote::ollama + - provider_type: remote::vllm + - provider_type: remote::tgi + - provider_type: remote::fireworks + - provider_type: remote::together + - provider_type: remote::bedrock + - provider_type: remote::nvidia + - provider_type: remote::openai + - provider_type: remote::anthropic + - provider_type: remote::gemini + - provider_type: remote::vertexai + - provider_type: remote::groq + - provider_type: remote::sambanova + - provider_type: remote::azure + - provider_type: inline::sentence-transformers + vector_io: + - provider_type: inline::faiss + - provider_type: inline::sqlite-vec + - provider_type: inline::milvus + - provider_type: remote::chromadb + - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate + files: + - provider_type: inline::localfs + safety: + - provider_type: inline::llama-guard + - provider_type: inline::code-scanner + agents: + - provider_type: inline::meta-reference + post_training: + - provider_type: inline::torchtune-cpu + eval: + - provider_type: inline::meta-reference + datasetio: + - provider_type: remote::huggingface + - provider_type: inline::localfs + scoring: + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust + tool_runtime: + - provider_type: remote::brave-search + - provider_type: remote::tavily-search + - provider_type: inline::rag-runtime + - provider_type: remote::model-context-protocol + batches: + - provider_type: inline::reference +image_type: venv +additional_pip_packages: +- aiosqlite +- asyncpg +- psycopg2-binary +- sqlalchemy[asyncio] diff --git a/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py similarity index 100% rename from llama_stack/distributions/ci-tests/ci_tests.py rename to src/llama_stack/distributions/ci-tests/ci_tests.py diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml new file mode 100644 index 000000000..8414dcae5 --- /dev/null +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -0,0 +1,290 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml new file mode 100644 index 000000000..e83fc7fb5 --- /dev/null +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -0,0 +1,281 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/llama_stack/distributions/dell/__init__.py b/src/llama_stack/distributions/dell/__init__.py similarity index 100% rename from llama_stack/distributions/dell/__init__.py rename to src/llama_stack/distributions/dell/__init__.py diff --git a/llama_stack/distributions/dell/build.yaml b/src/llama_stack/distributions/dell/build.yaml similarity index 100% rename from llama_stack/distributions/dell/build.yaml rename to src/llama_stack/distributions/dell/build.yaml diff --git a/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py similarity index 99% rename from llama_stack/distributions/dell/dell.py rename to src/llama_stack/distributions/dell/dell.py index 88e72688f..52a07b7f1 100644 --- a/llama_stack/distributions/dell/dell.py +++ b/src/llama_stack/distributions/dell/dell.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import ( BuildProvider, ModelInput, @@ -17,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig +from llama_stack_api import ModelType def get_distribution_template() -> DistributionTemplate: diff --git a/src/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md new file mode 100644 index 000000000..1530f665a --- /dev/null +++ b/src/llama_stack/distributions/dell/doc_template.md @@ -0,0 +1,209 @@ +--- +orphan: true +--- + +# Dell Distribution of Llama Stack + +```{toctree} +:maxdepth: 2 +:hidden: + +self +``` + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference. + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + + +## Setting up Inference server using Dell Enterprise Hub's custom TGI container. + +NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified. + +```bash +export INFERENCE_PORT=8181 +export DEH_URL=http://0.0.0.0:$INFERENCE_PORT +export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct +export CHROMADB_HOST=localhost +export CHROMADB_PORT=6601 +export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT +export CUDA_VISIBLE_DEVICES=0 +export LLAMA_STACK_PORT=8321 + +docker run --rm -it \ + --pull always \ + --network host \ + -v $HOME/.cache/huggingface:/data \ + -e HF_TOKEN=$HF_TOKEN \ + -p $INFERENCE_PORT:$INFERENCE_PORT \ + --gpus $CUDA_VISIBLE_DEVICES \ + ghcr.io/huggingface/text-generation-inference \ + --dtype bfloat16 \ + --usage-stats off \ + --sharded false \ + --cuda-memory-fraction 0.7 \ + --model-id $INFERENCE_MODEL \ + --port $INFERENCE_PORT --hostname 0.0.0.0 +``` + +If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like: + +```bash +export SAFETY_INFERENCE_PORT=8282 +export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT +export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +export CUDA_VISIBLE_DEVICES=1 + +docker run --rm -it \ + --pull always \ + --network host \ + -v $HOME/.cache/huggingface:/data \ + -e HF_TOKEN=$HF_TOKEN \ + -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \ + --gpus $CUDA_VISIBLE_DEVICES \ + ghcr.io/huggingface/text-generation-inference \ + --dtype bfloat16 \ + --usage-stats off \ + --sharded false \ + --cuda-memory-fraction 0.7 \ + --model-id $SAFETY_MODEL \ + --hostname 0.0.0.0 \ + --port $SAFETY_INFERENCE_PORT +``` + +## Dell distribution relies on ChromaDB for vector database usage + +You can start a chroma-db easily using docker. +```bash +# This is where the indices are persisted +mkdir -p $HOME/chromadb + +podman run --rm -it \ + --network host \ + --name chromadb \ + -v $HOME/chromadb:/chroma/chroma \ + -e IS_PERSISTENT=TRUE \ + chromadb/chroma:latest \ + --port $CHROMADB_PORT \ + --host $CHROMADB_HOST +``` + +## Running Llama Stack + +Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +docker run -it \ + --pull always \ + --network host \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v $HOME/.llama:/root/.llama \ + # NOTE: mount the llama-stack directory if testing local changes else not needed + -v $HOME/git/llama-stack:/app/llama-stack-source \ + # localhost/distribution-dell:dev if building / testing locally + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-{{ name }}\ + --port $LLAMA_STACK_PORT + +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + +export SAFETY_INFERENCE_PORT=8282 +export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT +export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v $HOME/.llama:/root/.llama \ + -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e SAFETY_MODEL=$SAFETY_MODEL \ + -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-{{ name }} \ + --config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT +``` + +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml + +docker run -it \ + --pull always \ + --network host \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v $HOME/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + +### Via Conda + +Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. + +```bash +llama stack list-deps {{ name }} | xargs -L1 pip install +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run {{ name }} \ + --port $LLAMA_STACK_PORT +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +SAFETY_MODEL=$SAFETY_MODEL \ +DEH_SAFETY_URL=$DEH_SAFETY_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run ./run-with-safety.yaml \ + --port $LLAMA_STACK_PORT +``` diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml new file mode 100644 index 000000000..63bd95168 --- /dev/null +++ b/src/llama_stack/distributions/dell/run-with-safety.yaml @@ -0,0 +1,142 @@ +version: 2 +image_name: dell +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: ${env.DEH_URL} + - provider_id: tgi1 + provider_type: remote::tgi + config: + url: ${env.DEH_SAFETY_URL} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: tgi1 + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: ${env.SAFETY_MODEL} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: brave-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml new file mode 100644 index 000000000..93f0c35bc --- /dev/null +++ b/src/llama_stack/distributions/dell/run.yaml @@ -0,0 +1,133 @@ +version: 2 +image_name: dell +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: ${env.DEH_URL} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: brave-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/distributions/meta-reference-gpu/__init__.py b/src/llama_stack/distributions/meta-reference-gpu/__init__.py similarity index 100% rename from llama_stack/distributions/meta-reference-gpu/__init__.py rename to src/llama_stack/distributions/meta-reference-gpu/__init__.py diff --git a/llama_stack/distributions/meta-reference-gpu/build.yaml b/src/llama_stack/distributions/meta-reference-gpu/build.yaml similarity index 100% rename from llama_stack/distributions/meta-reference-gpu/build.yaml rename to src/llama_stack/distributions/meta-reference-gpu/build.yaml diff --git a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md new file mode 100644 index 000000000..af71d8388 --- /dev/null +++ b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -0,0 +1,119 @@ +--- +orphan: true +--- +# Meta Reference GPU Distribution + +```{toctree} +:maxdepth: 2 +:hidden: + +self +``` + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations: + +{{ providers_table }} + +Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + + +## Prerequisite: Downloading Models + +Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI. +``` + +## Running the Distribution + +You can do this via venv or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=8321 +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + +### Via venv + +Make sure you have the Llama Stack CLI available. + +```bash +llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +llama stack run distributions/{{ name }}/run.yaml \ + --port 8321 +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ +llama stack run distributions/{{ name }}/run-with-safety.yaml \ + --port 8321 +``` diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py similarity index 99% rename from llama_stack/distributions/meta-reference-gpu/meta_reference.py rename to src/llama_stack/distributions/meta-reference-gpu/meta_reference.py index 4e4ddef33..a515794d5 100644 --- a/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -6,7 +6,6 @@ from pathlib import Path -from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import ( BuildProvider, ModelInput, @@ -22,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack_api import ModelType def get_distribution_template() -> DistributionTemplate: diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml new file mode 100644 index 000000000..63fc3b1d2 --- /dev/null +++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -0,0 +1,155 @@ +version: 2 +image_name: meta-reference-gpu +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: meta-reference-inference + provider_type: inline::meta-reference + config: + model: ${env.INFERENCE_MODEL} + checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} + quantization: + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + - provider_id: meta-reference-safety + provider_type: inline::meta-reference + config: + model: ${env.SAFETY_MODEL} + checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null} + quantization: + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: meta-reference-safety + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: ${env.SAFETY_MODEL} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml new file mode 100644 index 000000000..ba8235398 --- /dev/null +++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -0,0 +1,140 @@ +version: 2 +image_name: meta-reference-gpu +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: meta-reference-inference + provider_type: inline::meta-reference + config: + model: ${env.INFERENCE_MODEL} + checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null} + quantization: + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: meta-reference-inference + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/distributions/nvidia/__init__.py b/src/llama_stack/distributions/nvidia/__init__.py similarity index 100% rename from llama_stack/distributions/nvidia/__init__.py rename to src/llama_stack/distributions/nvidia/__init__.py diff --git a/llama_stack/distributions/nvidia/build.yaml b/src/llama_stack/distributions/nvidia/build.yaml similarity index 100% rename from llama_stack/distributions/nvidia/build.yaml rename to src/llama_stack/distributions/nvidia/build.yaml diff --git a/src/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md new file mode 100644 index 000000000..054a1e3ec --- /dev/null +++ b/src/llama_stack/distributions/nvidia/doc_template.md @@ -0,0 +1,170 @@ +--- +orphan: true +--- +# NVIDIA Distribution + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + +{% if default_models %} +### Models + +The following models are available by default: + +{% for model in default_models %} +- `{{ model.model_id }} {{ model.doc_string }}` +{% endfor %} +{% endif %} + + +## Prerequisites +### NVIDIA API Keys + +Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable. + +### Deploy NeMo Microservices Platform +The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform. + +## Supported Services +Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints. + +### Inference: NVIDIA NIM +NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs: + 1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key) + 2. Self-hosted: NVIDIA NIMs that run on your own infrastructure. + +The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment. + +### Datasetio API: NeMo Data Store +The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint. + +See the [NVIDIA Datasetio docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage. + +### Eval API: NeMo Evaluator +The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Eval docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage. + +### Post-Training API: NeMo Customizer +The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Post-Training docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage. + +### Safety API: NeMo Guardrails +The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Safety docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/safety/nvidia/README.md) for supported features and example usage. + +## Deploying models +In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`. + +Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart. +```sh +# URL to NeMo NIM Proxy service +export NEMO_URL="http://nemo.test" + +curl --location "$NEMO_URL/v1/deployment/model-deployments" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "name": "llama-3.2-1b-instruct", + "namespace": "meta", + "config": { + "model": "meta/llama-3.2-1b-instruct", + "nim_deployment": { + "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct", + "image_tag": "1.8.3", + "pvc_size": "25Gi", + "gpu": 1, + "additional_envs": { + "NIM_GUIDED_DECODING_BACKEND": "fast_outlines" + } + } + } + }' +``` +This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference. + +You can also remove a deployed NIM to free up GPU resources, if needed. +```sh +export NEMO_URL="http://nemo.test" + +curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct" +``` + +## Running Llama Stack with NVIDIA + +You can do this via venv (build code), or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=8321 +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + +### Via venv + +If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. + +```bash +INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct +llama stack list-deps nvidia | xargs -L1 uv pip install +NVIDIA_API_KEY=$NVIDIA_API_KEY \ +INFERENCE_MODEL=$INFERENCE_MODEL \ +llama stack run ./run.yaml \ + --port 8321 +``` + +## Example Notebooks +For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in [docs/notebooks/nvidia](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks/nvidia). diff --git a/llama_stack/distributions/nvidia/nvidia.py b/src/llama_stack/distributions/nvidia/nvidia.py similarity index 100% rename from llama_stack/distributions/nvidia/nvidia.py rename to src/llama_stack/distributions/nvidia/nvidia.py diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml new file mode 100644 index 000000000..7d95565e5 --- /dev/null +++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -0,0 +1,137 @@ +version: 2 +image_name: nvidia +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: nvidia + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: nvidia + provider_type: remote::nvidia + config: + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} + post_training: + - provider_id: nvidia + provider_type: remote::nvidia + config: + api_key: ${env.NVIDIA_API_KEY:=} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} + datasetio: + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + - provider_id: nvidia + provider_type: remote::nvidia + config: + api_key: ${env.NVIDIA_API_KEY:=} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} + scoring: + - provider_id: basic + provider_type: inline::basic + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: nvidia + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: nvidia + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL} + provider_id: nvidia + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml new file mode 100644 index 000000000..8c80b8303 --- /dev/null +++ b/src/llama_stack/distributions/nvidia/run.yaml @@ -0,0 +1,116 @@ +version: 2 +image_name: nvidia +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: nvidia + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: nvidia + provider_type: remote::nvidia + config: + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} + post_training: + - provider_id: nvidia + provider_type: remote::nvidia + config: + api_key: ${env.NVIDIA_API_KEY:=} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} + datasetio: + - provider_id: nvidia + provider_type: remote::nvidia + config: + api_key: ${env.NVIDIA_API_KEY:=} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} + scoring: + - provider_id: basic + provider_type: inline::basic + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files} + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/src/llama_stack/distributions/oci/__init__.py b/src/llama_stack/distributions/oci/__init__.py new file mode 100644 index 000000000..68c0efe44 --- /dev/null +++ b/src/llama_stack/distributions/oci/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .oci import get_distribution_template # noqa: F401 diff --git a/src/llama_stack/distributions/oci/build.yaml b/src/llama_stack/distributions/oci/build.yaml new file mode 100644 index 000000000..7e082e1f6 --- /dev/null +++ b/src/llama_stack/distributions/oci/build.yaml @@ -0,0 +1,35 @@ +version: 2 +distribution_spec: + description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM + inference with scalable cloud services + providers: + inference: + - provider_type: remote::oci + vector_io: + - provider_type: inline::faiss + - provider_type: remote::chromadb + - provider_type: remote::pgvector + safety: + - provider_type: inline::llama-guard + agents: + - provider_type: inline::meta-reference + eval: + - provider_type: inline::meta-reference + datasetio: + - provider_type: remote::huggingface + - provider_type: inline::localfs + scoring: + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust + tool_runtime: + - provider_type: remote::brave-search + - provider_type: remote::tavily-search + - provider_type: inline::rag-runtime + - provider_type: remote::model-context-protocol + files: + - provider_type: inline::localfs +image_type: venv +additional_pip_packages: +- aiosqlite +- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/oci/doc_template.md b/src/llama_stack/distributions/oci/doc_template.md new file mode 100644 index 000000000..320530ccd --- /dev/null +++ b/src/llama_stack/distributions/oci/doc_template.md @@ -0,0 +1,140 @@ +--- +orphan: true +--- +# OCI Distribution + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + +{% if default_models %} +### Models + +The following models are available by default: + +{% for model in default_models %} +- `{{ model.model_id }} {{ model.doc_string }}` +{% endfor %} +{% endif %} + +## Prerequisites +### Oracle Cloud Infrastructure Setup + +Before using the OCI Generative AI distribution, ensure you have: + +1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/) +2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy +3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models +4. **Authentication**: Configure authentication using either: + - **Instance Principal** (recommended for cloud-hosted deployments) + - **API Key** (for on-premises or development environments) + +### Authentication Methods + +#### Instance Principal Authentication (Recommended) +Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments. + +Requirements: +- Instance must be running in an Oracle Cloud Infrastructure compartment +- Instance must have appropriate IAM policies to access Generative AI services + +#### API Key Authentication +For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file. + +### Required IAM Policies + +Ensure your OCI user or instance has the following policy statements: + +``` +Allow group to use generative-ai-inference-endpoints in compartment +Allow group to manage generative-ai-inference-endpoints in compartment +``` + +## Supported Services + +### Inference: OCI Generative AI +Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports: + +- **Chat Completions**: Conversational AI with context awareness +- **Text Generation**: Complete prompts and generate text content + +#### Available Models +Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models. + +### Safety: Llama Guard +For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide: +- Content filtering and moderation +- Policy compliance checking +- Harmful content detection + +### Vector Storage: Multiple Options +The distribution supports several vector storage providers: +- **FAISS**: Local in-memory vector search +- **ChromaDB**: Distributed vector database +- **PGVector**: PostgreSQL with vector extensions + +### Additional Services +- **Dataset I/O**: Local filesystem and Hugging Face integration +- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities +- **Evaluation**: Meta reference evaluation framework + +## Running Llama Stack with OCI + +You can run the OCI distribution via Docker or local virtual environment. + +### Via venv + +If you've set up your local development environment, you can also build the image using your local virtual environment. + +```bash +OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci +``` + +### Configuration Examples + +#### Using Instance Principal (Recommended for Production) +```bash +export OCI_AUTH_TYPE=instance_principal +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1.. +``` + +#### Using API Key Authentication (Development) +```bash +export OCI_AUTH_TYPE=config_file +export OCI_CONFIG_FILE_PATH=~/.oci/config +export OCI_CLI_PROFILE=DEFAULT +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id +``` + +## Regional Endpoints + +OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit: + +https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions + +## Troubleshooting + +### Common Issues + +1. **Authentication Errors**: Verify your OCI credentials and IAM policies +2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region +3. **Permission Denied**: Check compartment permissions and Generative AI service access +4. **Region Unavailable**: Verify the specified region supports Generative AI services + +### Getting Help + +For additional support: +- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm) +- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues) \ No newline at end of file diff --git a/src/llama_stack/distributions/oci/oci.py b/src/llama_stack/distributions/oci/oci.py new file mode 100644 index 000000000..1f21840f1 --- /dev/null +++ b/src/llama_stack/distributions/oci/oci.py @@ -0,0 +1,108 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput +from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack.providers.remote.inference.oci.config import OCIConfig + + +def get_distribution_template(name: str = "oci") -> DistributionTemplate: + providers = { + "inference": [BuildProvider(provider_type="remote::oci")], + "vector_io": [ + BuildProvider(provider_type="inline::faiss"), + BuildProvider(provider_type="remote::chromadb"), + BuildProvider(provider_type="remote::pgvector"), + ], + "safety": [BuildProvider(provider_type="inline::llama-guard")], + "agents": [BuildProvider(provider_type="inline::meta-reference")], + "eval": [BuildProvider(provider_type="inline::meta-reference")], + "datasetio": [ + BuildProvider(provider_type="remote::huggingface"), + BuildProvider(provider_type="inline::localfs"), + ], + "scoring": [ + BuildProvider(provider_type="inline::basic"), + BuildProvider(provider_type="inline::llm-as-judge"), + BuildProvider(provider_type="inline::braintrust"), + ], + "tool_runtime": [ + BuildProvider(provider_type="remote::brave-search"), + BuildProvider(provider_type="remote::tavily-search"), + BuildProvider(provider_type="inline::rag-runtime"), + BuildProvider(provider_type="remote::model-context-protocol"), + ], + "files": [BuildProvider(provider_type="inline::localfs")], + } + + inference_provider = Provider( + provider_id="oci", + provider_type="remote::oci", + config=OCIConfig.sample_run_config(), + ) + + vector_io_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) + + files_provider = Provider( + provider_id="meta-reference-files", + provider_type="inline::localfs", + config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ] + + return DistributionTemplate( + name=name, + distro_type="remote_hosted", + description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services", + container_image=None, + template_path=Path(__file__).parent / "doc_template.md", + providers=providers, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider], + "vector_io": [vector_io_provider], + "files": [files_provider], + }, + default_tool_groups=default_tool_groups, + ), + }, + run_config_env_vars={ + "OCI_AUTH_TYPE": ( + "instance_principal", + "OCI authentication type (instance_principal or config_file)", + ), + "OCI_REGION": ( + "", + "OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)", + ), + "OCI_COMPARTMENT_OCID": ( + "", + "OCI compartment ID for the Generative AI service", + ), + "OCI_CONFIG_FILE_PATH": ( + "~/.oci/config", + "OCI config file path (required if OCI_AUTH_TYPE is config_file)", + ), + "OCI_CLI_PROFILE": ( + "DEFAULT", + "OCI CLI profile name to use from config file", + ), + }, + ) diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml new file mode 100644 index 000000000..ff0c818be --- /dev/null +++ b/src/llama_stack/distributions/oci/run.yaml @@ -0,0 +1,134 @@ +version: 2 +image_name: oci +apis: +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: oci + provider_type: remote::oci + config: + oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal} + oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config} + oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT} + oci_region: ${env.OCI_REGION:=us-ashburn-1} + oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files} + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search +server: + port: 8321 diff --git a/llama_stack/distributions/open-benchmark/__init__.py b/src/llama_stack/distributions/open-benchmark/__init__.py similarity index 100% rename from llama_stack/distributions/open-benchmark/__init__.py rename to src/llama_stack/distributions/open-benchmark/__init__.py diff --git a/llama_stack/distributions/open-benchmark/build.yaml b/src/llama_stack/distributions/open-benchmark/build.yaml similarity index 100% rename from llama_stack/distributions/open-benchmark/build.yaml rename to src/llama_stack/distributions/open-benchmark/build.yaml diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py similarity index 98% rename from llama_stack/distributions/open-benchmark/open_benchmark.py rename to src/llama_stack/distributions/open-benchmark/open_benchmark.py index 2b7760894..1f4dbf2c2 100644 --- a/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -5,8 +5,6 @@ # the root directory of this source tree. -from llama_stack.apis.datasets import DatasetPurpose, URIDataSource -from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import ( BenchmarkInput, BuildProvider, @@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( PGVectorVectorIOConfig, ) from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry +from llama_stack_api import DatasetPurpose, ModelType, URIDataSource def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml new file mode 100644 index 000000000..43aa45b51 --- /dev/null +++ b/src/llama_stack/distributions/open-benchmark/run.yaml @@ -0,0 +1,253 @@ +version: 2 +image_name: open-benchmark +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + vector_io: + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: gpt-4o + provider_id: openai + provider_model_id: gpt-4o + model_type: llm + - metadata: {} + model_id: claude-3-5-sonnet-latest + provider_id: anthropic + provider_model_id: claude-3-5-sonnet-latest + model_type: llm + - metadata: {} + model_id: gemini/gemini-1.5-flash + provider_id: gemini + provider_model_id: gemini/gemini-1.5-flash + model_type: llm + - metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm + - metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm + shields: + - shield_id: meta-llama/Llama-Guard-3-8B + vector_dbs: [] + datasets: + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/simpleqa?split=train + metadata: {} + dataset_id: simpleqa + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all + metadata: {} + dataset_id: mmlu_cot + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main + metadata: {} + dataset_id: gpqa_cot + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/math_500?split=test + metadata: {} + dataset_id: math_500 + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/IfEval?split=train + metadata: {} + dataset_id: ifeval + - purpose: eval/messages-answer + source: + type: uri + uri: huggingface://datasets/llamastack/docvqa?split=val + metadata: {} + dataset_id: docvqa + scoring_fns: [] + benchmarks: + - dataset_id: simpleqa + scoring_functions: + - llm-as-judge::405b-simpleqa + metadata: {} + benchmark_id: meta-reference-simpleqa + - dataset_id: mmlu_cot + scoring_functions: + - basic::regex_parser_multiple_choice_answer + metadata: {} + benchmark_id: meta-reference-mmlu-cot + - dataset_id: gpqa_cot + scoring_functions: + - basic::regex_parser_multiple_choice_answer + metadata: {} + benchmark_id: meta-reference-gpqa-cot + - dataset_id: math_500 + scoring_functions: + - basic::regex_parser_math_response + metadata: {} + benchmark_id: meta-reference-math-500 + - dataset_id: ifeval + scoring_functions: + - basic::ifeval + metadata: {} + benchmark_id: meta-reference-ifeval + - dataset_id: docvqa + scoring_functions: + - basic::docvqa + metadata: {} + benchmark_id: meta-reference-docvqa + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml new file mode 100644 index 000000000..c9316f923 --- /dev/null +++ b/src/llama_stack/distributions/postgres-demo/run.yaml @@ -0,0 +1,116 @@ +version: 2 +image_name: postgres-demo +apis: +- agents +- inference +- safety +- tool_runtime +- vector_io +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + shields: + - shield_id: meta-llama/Llama-Guard-3-8B + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/distributions/starter-gpu/__init__.py b/src/llama_stack/distributions/starter-gpu/__init__.py similarity index 100% rename from llama_stack/distributions/starter-gpu/__init__.py rename to src/llama_stack/distributions/starter-gpu/__init__.py diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml new file mode 100644 index 000000000..10cbb1389 --- /dev/null +++ b/src/llama_stack/distributions/starter-gpu/build.yaml @@ -0,0 +1,61 @@ +version: 2 +distribution_spec: + description: Quick start template for running Llama Stack with several popular providers. + This distribution is intended for GPU-enabled environments. + providers: + inference: + - provider_type: remote::cerebras + - provider_type: remote::ollama + - provider_type: remote::vllm + - provider_type: remote::tgi + - provider_type: remote::fireworks + - provider_type: remote::together + - provider_type: remote::bedrock + - provider_type: remote::nvidia + - provider_type: remote::openai + - provider_type: remote::anthropic + - provider_type: remote::gemini + - provider_type: remote::vertexai + - provider_type: remote::groq + - provider_type: remote::sambanova + - provider_type: remote::azure + - provider_type: inline::sentence-transformers + vector_io: + - provider_type: inline::faiss + - provider_type: inline::sqlite-vec + - provider_type: inline::milvus + - provider_type: remote::chromadb + - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate + files: + - provider_type: inline::localfs + safety: + - provider_type: inline::llama-guard + - provider_type: inline::code-scanner + agents: + - provider_type: inline::meta-reference + post_training: + - provider_type: inline::huggingface-gpu + eval: + - provider_type: inline::meta-reference + datasetio: + - provider_type: remote::huggingface + - provider_type: inline::localfs + scoring: + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust + tool_runtime: + - provider_type: remote::brave-search + - provider_type: remote::tavily-search + - provider_type: inline::rag-runtime + - provider_type: remote::model-context-protocol + batches: + - provider_type: inline::reference +image_type: venv +additional_pip_packages: +- aiosqlite +- asyncpg +- psycopg2-binary +- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml new file mode 100644 index 000000000..0662986f1 --- /dev/null +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -0,0 +1,293 @@ +version: 2 +image_name: starter-gpu +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: huggingface-gpu + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml new file mode 100644 index 000000000..9ef5b3f6d --- /dev/null +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -0,0 +1,284 @@ +version: 2 +image_name: starter-gpu +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: huggingface-gpu + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/src/llama_stack/distributions/starter-gpu/starter_gpu.py similarity index 100% rename from llama_stack/distributions/starter-gpu/starter_gpu.py rename to src/llama_stack/distributions/starter-gpu/starter_gpu.py diff --git a/llama_stack/distributions/starter/__init__.py b/src/llama_stack/distributions/starter/__init__.py similarity index 100% rename from llama_stack/distributions/starter/__init__.py rename to src/llama_stack/distributions/starter/__init__.py diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml new file mode 100644 index 000000000..acd51f773 --- /dev/null +++ b/src/llama_stack/distributions/starter/build.yaml @@ -0,0 +1,61 @@ +version: 2 +distribution_spec: + description: Quick start template for running Llama Stack with several popular providers. + This distribution is intended for CPU-only environments. + providers: + inference: + - provider_type: remote::cerebras + - provider_type: remote::ollama + - provider_type: remote::vllm + - provider_type: remote::tgi + - provider_type: remote::fireworks + - provider_type: remote::together + - provider_type: remote::bedrock + - provider_type: remote::nvidia + - provider_type: remote::openai + - provider_type: remote::anthropic + - provider_type: remote::gemini + - provider_type: remote::vertexai + - provider_type: remote::groq + - provider_type: remote::sambanova + - provider_type: remote::azure + - provider_type: inline::sentence-transformers + vector_io: + - provider_type: inline::faiss + - provider_type: inline::sqlite-vec + - provider_type: inline::milvus + - provider_type: remote::chromadb + - provider_type: remote::pgvector + - provider_type: remote::qdrant + - provider_type: remote::weaviate + files: + - provider_type: inline::localfs + safety: + - provider_type: inline::llama-guard + - provider_type: inline::code-scanner + agents: + - provider_type: inline::meta-reference + post_training: + - provider_type: inline::torchtune-cpu + eval: + - provider_type: inline::meta-reference + datasetio: + - provider_type: remote::huggingface + - provider_type: inline::localfs + scoring: + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust + tool_runtime: + - provider_type: remote::brave-search + - provider_type: remote::tavily-search + - provider_type: inline::rag-runtime + - provider_type: remote::model-context-protocol + batches: + - provider_type: inline::reference +image_type: venv +additional_pip_packages: +- aiosqlite +- asyncpg +- psycopg2-binary +- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml new file mode 100644 index 000000000..1da4f0da7 --- /dev/null +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -0,0 +1,290 @@ +version: 2 +image_name: starter +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml new file mode 100644 index 000000000..3e6cde13a --- /dev/null +++ b/src/llama_stack/distributions/starter/run.yaml @@ -0,0 +1,281 @@ +version: 2 +image_name: starter +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai/v1 + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + base_url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + base_url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + base_url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1} + api_key: ${env.NVIDIA_API_KEY:=} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + base_url: https://api.groq.com/openai/v1 + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + base_url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + base_url: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py new file mode 100644 index 000000000..32264eebb --- /dev/null +++ b/src/llama_stack/distributions/starter/starter.py @@ -0,0 +1,349 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import Any + +from llama_stack.core.datatypes import ( + BuildProvider, + Provider, + ProviderSpec, + QualifiedModel, + SafetyConfig, + ShieldInput, + ToolGroupInput, + VectorStoresConfig, +) +from llama_stack.core.storage.kvstore.config import PostgresKVStoreConfig +from llama_stack.core.storage.sqlstore.sqlstore import PostgresSqlStoreConfig +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig +from llama_stack.providers.inline.inference.sentence_transformers import ( + SentenceTransformersInferenceConfig, +) +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig +from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( + SQLiteVectorIOConfig, +) +from llama_stack.providers.registry.inference import available_providers +from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig +from llama_stack.providers.remote.vector_io.pgvector.config import ( + PGVectorVectorIOConfig, +) +from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig +from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig +from llama_stack_api import RemoteProviderSpec + + +def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: + """Get configuration for a provider using its adapter's config class.""" + config_class = instantiate_class_type(provider_spec.config_class) + + if hasattr(config_class, "sample_run_config"): + config: dict[str, Any] = config_class.sample_run_config() + return config + return {} + + +ENABLED_INFERENCE_PROVIDERS = [ + "ollama", + "vllm", + "tgi", + "fireworks", + "together", + "gemini", + "vertexai", + "groq", + "sambanova", + "anthropic", + "openai", + "cerebras", + "nvidia", + "bedrock", + "azure", +] + +INFERENCE_PROVIDER_IDS = { + "ollama": "${env.OLLAMA_URL:+ollama}", + "vllm": "${env.VLLM_URL:+vllm}", + "tgi": "${env.TGI_URL:+tgi}", + "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}", + "nvidia": "${env.NVIDIA_API_KEY:+nvidia}", + "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}", + "azure": "${env.AZURE_API_KEY:+azure}", +} + + +def get_remote_inference_providers() -> list[Provider]: + # Filter out inline providers and some others - the starter distro only exposes remote providers + remote_providers = [ + provider + for provider in available_providers() + if isinstance(provider, RemoteProviderSpec) and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS + ] + + inference_providers = [] + for provider_spec in remote_providers: + provider_type = provider_spec.adapter_type + + if provider_type in INFERENCE_PROVIDER_IDS: + provider_id = INFERENCE_PROVIDER_IDS[provider_type] + else: + provider_id = provider_type.replace("-", "_").replace("::", "_") + config = _get_config_for_provider(provider_spec) + + inference_providers.append( + Provider( + provider_id=provider_id, + provider_type=f"remote::{provider_type}", + config=config, + ) + ) + return inference_providers + + +def get_distribution_template(name: str = "starter") -> DistributionTemplate: + remote_inference_providers = get_remote_inference_providers() + + providers = { + "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers] + + [BuildProvider(provider_type="inline::sentence-transformers")], + "vector_io": [ + BuildProvider(provider_type="inline::faiss"), + BuildProvider(provider_type="inline::sqlite-vec"), + BuildProvider(provider_type="inline::milvus"), + BuildProvider(provider_type="remote::chromadb"), + BuildProvider(provider_type="remote::pgvector"), + BuildProvider(provider_type="remote::qdrant"), + BuildProvider(provider_type="remote::weaviate"), + ], + "files": [BuildProvider(provider_type="inline::localfs")], + "safety": [ + BuildProvider(provider_type="inline::llama-guard"), + BuildProvider(provider_type="inline::code-scanner"), + ], + "agents": [BuildProvider(provider_type="inline::meta-reference")], + "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")], + "eval": [BuildProvider(provider_type="inline::meta-reference")], + "datasetio": [ + BuildProvider(provider_type="remote::huggingface"), + BuildProvider(provider_type="inline::localfs"), + ], + "scoring": [ + BuildProvider(provider_type="inline::basic"), + BuildProvider(provider_type="inline::llm-as-judge"), + BuildProvider(provider_type="inline::braintrust"), + ], + "tool_runtime": [ + BuildProvider(provider_type="remote::brave-search"), + BuildProvider(provider_type="remote::tavily-search"), + BuildProvider(provider_type="inline::rag-runtime"), + BuildProvider(provider_type="remote::model-context-protocol"), + ], + "batches": [ + BuildProvider(provider_type="inline::reference"), + ], + } + files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}") + files_provider = Provider( + provider_id="meta-reference-files", + provider_type="inline::localfs", + config=files_config, + ) + embedding_provider = Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + config=SentenceTransformersInferenceConfig.sample_run_config(), + ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::rag", + provider_id="rag-runtime", + ), + ] + default_shields = [ + # if the + ShieldInput( + shield_id="llama-guard", + provider_id="${env.SAFETY_MODEL:+llama-guard}", + provider_shield_id="${env.SAFETY_MODEL:=}", + ), + ShieldInput( + shield_id="code-scanner", + provider_id="${env.CODE_SCANNER_MODEL:+code-scanner}", + provider_shield_id="${env.CODE_SCANNER_MODEL:=}", + ), + ] + postgres_sql_config = PostgresSqlStoreConfig.sample_run_config() + postgres_kv_config = PostgresKVStoreConfig.sample_run_config() + default_overrides = { + "inference": remote_inference_providers + [embedding_provider], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="sqlite-vec", + provider_type="inline::sqlite-vec", + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.MILVUS_URL:+milvus}", + provider_type="inline::milvus", + config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.CHROMADB_URL:+chromadb}", + provider_type="remote::chromadb", + config=ChromaVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}/", + url="${env.CHROMADB_URL:=}", + ), + ), + Provider( + provider_id="${env.PGVECTOR_DB:+pgvector}", + provider_type="remote::pgvector", + config=PGVectorVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + db="${env.PGVECTOR_DB:=}", + user="${env.PGVECTOR_USER:=}", + password="${env.PGVECTOR_PASSWORD:=}", + ), + ), + Provider( + provider_id="${env.QDRANT_URL:+qdrant}", + provider_type="remote::qdrant", + config=QdrantVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + url="${env.QDRANT_URL:=}", + ), + ), + Provider( + provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}", + provider_type="remote::weaviate", + config=WeaviateVectorIOConfig.sample_run_config( + f"~/.llama/distributions/{name}", + cluster_url="${env.WEAVIATE_CLUSTER_URL:=}", + ), + ), + ], + "files": [files_provider], + } + + base_run_settings = RunConfigSettings( + provider_overrides=default_overrides, + default_models=[], + default_tool_groups=default_tool_groups, + default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), + safety_config=SafetyConfig( + default_shield_id="llama-guard", + ), + ) + + postgres_run_settings = base_run_settings.model_copy( + update={ + "storage_backends": { + "kv_default": postgres_kv_config, + "sql_default": postgres_sql_config, + } + }, + deep=True, + ) + + return DistributionTemplate( + name=name, + distro_type="self_hosted", + description="Quick start template for running Llama Stack with several popular providers. This distribution is intended for CPU-only environments.", + container_image=None, + template_path=None, + providers=providers, + additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), + run_configs={ + "run.yaml": base_run_settings, + "run-with-postgres-store.yaml": postgres_run_settings, + }, + run_config_env_vars={ + "LLAMA_STACK_PORT": ( + "8321", + "Port for the Llama Stack distribution server", + ), + "FIREWORKS_API_KEY": ( + "", + "Fireworks API Key", + ), + "OPENAI_API_KEY": ( + "", + "OpenAI API Key", + ), + "GROQ_API_KEY": ( + "", + "Groq API Key", + ), + "ANTHROPIC_API_KEY": ( + "", + "Anthropic API Key", + ), + "GEMINI_API_KEY": ( + "", + "Gemini API Key", + ), + "VERTEX_AI_PROJECT": ( + "", + "Google Cloud Project ID for Vertex AI", + ), + "VERTEX_AI_LOCATION": ( + "us-central1", + "Google Cloud Location for Vertex AI", + ), + "SAMBANOVA_API_KEY": ( + "", + "SambaNova API Key", + ), + "VLLM_URL": ( + "http://localhost:8000/v1", + "vLLM URL", + ), + "VLLM_INFERENCE_MODEL": ( + "", + "Optional vLLM Inference Model to register on startup", + ), + "OLLAMA_URL": ( + "http://localhost:11434", + "Ollama URL", + ), + "AZURE_API_KEY": ( + "", + "Azure API Key", + ), + "AZURE_API_BASE": ( + "", + "Azure API Base", + ), + "AZURE_API_VERSION": ( + "", + "Azure API Version", + ), + "AZURE_API_TYPE": ( + "azure", + "Azure API Type", + ), + }, + ) diff --git a/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py similarity index 95% rename from llama_stack/distributions/template.py rename to src/llama_stack/distributions/template.py index 64f21e626..bab3211e9 100644 --- a/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -12,8 +12,6 @@ import rich import yaml from pydantic import BaseModel, Field -from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, Api, @@ -24,8 +22,8 @@ from llama_stack.core.datatypes import ( DistributionSpec, ModelInput, Provider, + SafetyConfig, ShieldInput, - TelemetryConfig, ToolGroupInput, VectorStoresConfig, ) @@ -36,13 +34,14 @@ from llama_stack.core.storage.datatypes import ( SqlStoreReference, StorageBackendType, ) +from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig +from llama_stack.core.storage.kvstore.config import get_pip_packages as get_kv_pip_packages +from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.core.storage.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig -from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages +from llama_stack_api import DatasetPurpose, ModelType def filter_empty_values(obj: Any) -> Any: @@ -188,7 +187,7 @@ class RunConfigSettings(BaseModel): default_datasets: list[DatasetInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None vector_stores_config: VectorStoresConfig | None = None - telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) + safety_config: SafetyConfig | None = None storage_backends: dict[str, Any] | None = None storage_stores: dict[str, Any] | None = None @@ -257,6 +256,10 @@ class RunConfigSettings(BaseModel): backend="sql_default", table_name="openai_conversations", ).model_dump(exclude_none=True), + "prompts": KVStoreReference( + backend="kv_default", + namespace="prompts", + ).model_dump(exclude_none=True), } storage_config = dict( @@ -284,12 +287,14 @@ class RunConfigSettings(BaseModel): "server": { "port": 8321, }, - "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } if self.vector_stores_config: config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True) + if self.safety_config: + config["safety"] = self.safety_config.model_dump(exclude_none=True) + return config @@ -415,6 +420,7 @@ class DistributionTemplate(BaseModel): providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, default_models=default_models, + run_configs=list(self.run_configs.keys()), ) return "" diff --git a/llama_stack/distributions/watsonx/__init__.py b/src/llama_stack/distributions/watsonx/__init__.py similarity index 100% rename from llama_stack/distributions/watsonx/__init__.py rename to src/llama_stack/distributions/watsonx/__init__.py diff --git a/llama_stack/distributions/watsonx/build.yaml b/src/llama_stack/distributions/watsonx/build.yaml similarity index 100% rename from llama_stack/distributions/watsonx/build.yaml rename to src/llama_stack/distributions/watsonx/build.yaml diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml new file mode 100644 index 000000000..55ea34cb6 --- /dev/null +++ b/src/llama_stack/distributions/watsonx/run.yaml @@ -0,0 +1,134 @@ +version: 2 +image_name: watsonx +apis: +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: watsonx + provider_type: remote::watsonx + config: + base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:=} + project_id: ${env.WATSONX_PROJECT_ID:=} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files} + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/distributions/watsonx/watsonx.py b/src/llama_stack/distributions/watsonx/watsonx.py similarity index 100% rename from llama_stack/distributions/watsonx/watsonx.py rename to src/llama_stack/distributions/watsonx/watsonx.py diff --git a/llama_stack/env.py b/src/llama_stack/env.py similarity index 100% rename from llama_stack/env.py rename to src/llama_stack/env.py diff --git a/llama_stack/log.py b/src/llama_stack/log.py similarity index 89% rename from llama_stack/log.py rename to src/llama_stack/log.py index dc39f6881..a44a0ac26 100644 --- a/llama_stack/log.py +++ b/src/llama_stack/log.py @@ -9,15 +9,23 @@ import os import re from logging.config import dictConfig # allow-direct-logging +from pydantic import BaseModel, Field from rich.console import Console from rich.errors import MarkupError from rich.logging import RichHandler -from llama_stack.core.datatypes import LoggingConfig - # Default log level DEFAULT_LOG_LEVEL = logging.INFO + +class LoggingConfig(BaseModel): + category_levels: dict[str, str] = Field( + default_factory=dict, + description=""" +Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""", + ) + + # Predefined categories CATEGORIES = [ "core", @@ -29,7 +37,6 @@ CATEGORIES = [ "eval", "tools", "client", - "telemetry", "openai", "openai_responses", "openai_conversations", @@ -137,7 +144,8 @@ class CustomRichHandler(RichHandler): # Set a reasonable default width for console output, especially when redirected to files console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120")) # Don't force terminal codes to avoid ANSI escape codes in log files - kwargs["console"] = Console(width=console_width) + # Ensure logs go to stderr, not stdout + kwargs["console"] = Console(width=console_width, stderr=True) super().__init__(*args, **kwargs) def emit(self, record): @@ -177,6 +185,7 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | log_file (str | None): Path to a log file to additionally pipe the logs into. If None, reads from LLAMA_STACK_LOG_FILE environment variable. """ + global _category_levels # Read from environment variables if not explicitly provided if category_levels is None: category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) @@ -184,6 +193,9 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | if env_config: category_levels.update(parse_environment_config(env_config)) + # Update the module-level _category_levels so that already-created loggers pick up the new levels + _category_levels.update(category_levels) + if log_file is None: log_file = os.environ.get("LLAMA_STACK_LOG_FILE") log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s" @@ -268,14 +280,18 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | } dictConfig(logging_config) - # Ensure third-party libraries follow the root log level, but preserve - # already-configured loggers (e.g., uvicorn) and our own llama_stack loggers + # Update log levels for all loggers that were created before setup_logging was called for name, logger in logging.root.manager.loggerDict.items(): if isinstance(logger, logging.Logger): - # Skip infrastructure loggers (uvicorn, fastapi) and our own loggers - if name.startswith(("uvicorn", "fastapi", "llama_stack")): + # Skip infrastructure loggers (uvicorn, fastapi) to preserve their configured levels + if name.startswith(("uvicorn", "fastapi")): continue - logger.setLevel(root_level) + # Update llama_stack loggers if root level was explicitly set (e.g., via all=CRITICAL) + if name.startswith("llama_stack") and "root" in category_levels: + logger.setLevel(root_level) + # Update third-party library loggers + elif not name.startswith("llama_stack"): + logger.setLevel(root_level) def get_logger( diff --git a/llama_stack/core/ui/__init__.py b/src/llama_stack/models/__init__.py similarity index 100% rename from llama_stack/core/ui/__init__.py rename to src/llama_stack/models/__init__.py diff --git a/llama_stack/core/ui/modules/__init__.py b/src/llama_stack/models/llama/__init__.py similarity index 100% rename from llama_stack/core/ui/modules/__init__.py rename to src/llama_stack/models/llama/__init__.py diff --git a/llama_stack/models/llama/checkpoint.py b/src/llama_stack/models/llama/checkpoint.py similarity index 94% rename from llama_stack/models/llama/checkpoint.py rename to src/llama_stack/models/llama/checkpoint.py index c9e0030e3..b00e2ed18 100644 --- a/llama_stack/models/llama/checkpoint.py +++ b/src/llama_stack/models/llama/checkpoint.py @@ -38,18 +38,18 @@ def maybe_reshard_state_dict( mmap: bool = True, ) -> dict[str, torch.Tensor]: if str(map_location) == "cpu": - torch.set_default_tensor_type(torch.BFloat16Tensor) + torch.set_default_dtype(torch.bfloat16) else: - torch.set_default_tensor_type(torch.cuda.BFloat16Tensor) + torch.set_default_dtype(torch.bfloat16) - ckpt_paths = np.array(sorted(ckpt_paths)) + ckpt_paths_array = np.array(sorted(ckpt_paths)) new_mp_size, new_mp_rank = get_model_parallel_world_size(), get_model_parallel_rank() - old_mp_size = len(ckpt_paths) + old_mp_size = len(ckpt_paths_array) old_mp_ranks = map_mp_rank(old_mp_size, new_mp_size, new_mp_rank) - print(f"Loading checkpoint shards:\n{str(ckpt_paths[old_mp_ranks])}") # type: ignore - paths = ckpt_paths[old_mp_ranks] # type: ignore + print(f"Loading checkpoint shards:\n{str(ckpt_paths_array[old_mp_ranks])}") # type: ignore + paths = ckpt_paths_array[old_mp_ranks] # type: ignore state_dicts = [torch.load(str(p), map_location=map_location, mmap=mmap) for p in paths] if new_mp_size == old_mp_size: diff --git a/llama_stack/models/llama/datatypes.py b/src/llama_stack/models/llama/datatypes.py similarity index 100% rename from llama_stack/models/llama/datatypes.py rename to src/llama_stack/models/llama/datatypes.py diff --git a/llama_stack/models/llama/hadamard_utils.py b/src/llama_stack/models/llama/hadamard_utils.py similarity index 94% rename from llama_stack/models/llama/hadamard_utils.py rename to src/llama_stack/models/llama/hadamard_utils.py index 87f3829d0..02b569aaf 100644 --- a/llama_stack/models/llama/hadamard_utils.py +++ b/src/llama_stack/models/llama/hadamard_utils.py @@ -79,6 +79,8 @@ def add_hadamard_transform_for_spinquant(model: torch.nn.Module, prefix: str = " for module_name, module in model.named_children(): child_full_name = prefix + "." + module_name if re.search(pattern_last_linear_ffn, child_full_name): + # Module matching this pattern should be nn.Linear with in_features + assert isinstance(module, nn.Linear), f"Expected nn.Linear, got {type(module)}" new_module = nn.Sequential(HadamardModule(group_size=module.in_features), module) del module setattr(model, module_name, new_module) diff --git a/llama_stack/core/ui/page/__init__.py b/src/llama_stack/models/llama/llama3/__init__.py similarity index 100% rename from llama_stack/core/ui/page/__init__.py rename to src/llama_stack/models/llama/llama3/__init__.py diff --git a/llama_stack/models/llama/llama3/args.py b/src/llama_stack/models/llama/llama3/args.py similarity index 100% rename from llama_stack/models/llama/llama3/args.py rename to src/llama_stack/models/llama/llama3/args.py diff --git a/llama_stack/models/llama/llama3/chat_format.py b/src/llama_stack/models/llama/llama3/chat_format.py similarity index 100% rename from llama_stack/models/llama/llama3/chat_format.py rename to src/llama_stack/models/llama/llama3/chat_format.py diff --git a/llama_stack/models/llama/llama3/dog.jpg b/src/llama_stack/models/llama/llama3/dog.jpg similarity index 100% rename from llama_stack/models/llama/llama3/dog.jpg rename to src/llama_stack/models/llama/llama3/dog.jpg diff --git a/llama_stack/models/llama/llama3/generation.py b/src/llama_stack/models/llama/llama3/generation.py similarity index 99% rename from llama_stack/models/llama/llama3/generation.py rename to src/llama_stack/models/llama/llama3/generation.py index fe7be5ea9..9ac215c3b 100644 --- a/llama_stack/models/llama/llama3/generation.py +++ b/src/llama_stack/models/llama/llama3/generation.py @@ -26,8 +26,10 @@ from fairscale.nn.model_parallel.initialize import ( ) from termcolor import cprint +from llama_stack.models.llama.datatypes import ToolPromptFormat + from ..checkpoint import maybe_reshard_state_dict -from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage, ToolPromptFormat +from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage from .args import ModelArgs from .chat_format import ChatFormat, LLMInput from .model import Transformer diff --git a/llama_stack/models/llama/llama3/interface.py b/src/llama_stack/models/llama/llama3/interface.py similarity index 98% rename from llama_stack/models/llama/llama3/interface.py rename to src/llama_stack/models/llama/llama3/interface.py index b63ba4847..89be31a55 100644 --- a/llama_stack/models/llama/llama3/interface.py +++ b/src/llama_stack/models/llama/llama3/interface.py @@ -15,13 +15,10 @@ from pathlib import Path from termcolor import colored +from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall, ToolDefinition, ToolPromptFormat + from ..datatypes import ( - BuiltinTool, RawMessage, - StopReason, - ToolCall, - ToolDefinition, - ToolPromptFormat, ) from . import template_data from .chat_format import ChatFormat diff --git a/llama_stack/models/llama/llama3/model.py b/src/llama_stack/models/llama/llama3/model.py similarity index 100% rename from llama_stack/models/llama/llama3/model.py rename to src/llama_stack/models/llama/llama3/model.py diff --git a/llama_stack/models/llama/llama3/multimodal/__init__.py b/src/llama_stack/models/llama/llama3/multimodal/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3/multimodal/__init__.py rename to src/llama_stack/models/llama/llama3/multimodal/__init__.py diff --git a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py similarity index 97% rename from llama_stack/models/llama/llama3/multimodal/encoder_utils.py rename to src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py index 90ced13b2..a87d77cc3 100644 --- a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py +++ b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py @@ -141,15 +141,15 @@ def build_encoder_attention_mask( """ Build vision encoder attention mask that omits padding tokens. """ - masks = [] + masks_list: list[torch.Tensor] = [] for arx in ar: mask_i = torch.ones((num_chunks, x.shape[2], 1), dtype=x.dtype) mask_i[: arx[0] * arx[1], :ntok] = 0 mask_i = mask_i.view(num_chunks * x.shape[2], -1) mask_i = mask_i @ mask_i.T * get_negative_inf_value(x.dtype) mask_i = mask_i.unsqueeze(0) - masks.append(mask_i) - masks = torch.stack(masks).to(x.device).expand(-1, n_heads, -1, -1) + masks_list.append(mask_i) + masks = torch.stack(masks_list).to(x.device).expand(-1, n_heads, -1, -1) return masks diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/src/llama_stack/models/llama/llama3/multimodal/image_transform.py similarity index 97% rename from llama_stack/models/llama/llama3/multimodal/image_transform.py rename to src/llama_stack/models/llama/llama3/multimodal/image_transform.py index 7b20a31fa..de2709c74 100644 --- a/llama_stack/models/llama/llama3/multimodal/image_transform.py +++ b/src/llama_stack/models/llama/llama3/multimodal/image_transform.py @@ -95,7 +95,7 @@ class VariableSizeImageTransform: factors_set.add(n // i) return factors_set - def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> torch.Tensor: + def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> list[tuple[int, int]]: """ Computes all of the allowed resoltuions for a fixed number of chunks and patch_size. Useful for when dividing an image into chunks. @@ -198,10 +198,10 @@ class VariableSizeImageTransform: def resize_without_distortion( self, - image: torch.Tensor, + image: Image.Image, target_size: tuple[int, int], max_upscaling_size: int | None, - ) -> torch.Tensor: + ) -> Image.Image: """ Used to resize an image to target_resolution, without distortion. @@ -380,12 +380,12 @@ class VariableSizeImageTransform: assert isinstance(image, Image.Image), type(image) w, h = image.size - possible_resolutions = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size) - possible_resolutions = torch.tensor(possible_resolutions) + possible_resolutions_list = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size) + possible_resolutions_tensor = torch.tensor(possible_resolutions_list) best_resolution = self.get_best_fit( image_size=(w, h), - possible_resolutions=possible_resolutions, + possible_resolutions=possible_resolutions_tensor, resize_to_max_canvas=resize_to_max_canvas, ) diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/src/llama_stack/models/llama/llama3/multimodal/model.py similarity index 100% rename from llama_stack/models/llama/llama3/multimodal/model.py rename to src/llama_stack/models/llama/llama3/multimodal/model.py diff --git a/llama_stack/models/llama/llama3/multimodal/utils.py b/src/llama_stack/models/llama/llama3/multimodal/utils.py similarity index 100% rename from llama_stack/models/llama/llama3/multimodal/utils.py rename to src/llama_stack/models/llama/llama3/multimodal/utils.py diff --git a/llama_stack/models/llama/llama3/pasta.jpeg b/src/llama_stack/models/llama/llama3/pasta.jpeg similarity index 100% rename from llama_stack/models/llama/llama3/pasta.jpeg rename to src/llama_stack/models/llama/llama3/pasta.jpeg diff --git a/llama_stack/models/llama/llama3/prompt_templates/__init__.py b/src/llama_stack/models/llama/llama3/prompt_templates/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3/prompt_templates/__init__.py rename to src/llama_stack/models/llama/llama3/prompt_templates/__init__.py diff --git a/llama_stack/models/llama/llama3/prompt_templates/base.py b/src/llama_stack/models/llama/llama3/prompt_templates/base.py similarity index 100% rename from llama_stack/models/llama/llama3/prompt_templates/base.py rename to src/llama_stack/models/llama/llama3/prompt_templates/base.py diff --git a/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py new file mode 100644 index 000000000..3fbaa103e --- /dev/null +++ b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py @@ -0,0 +1,319 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# top-level folder for each specific model found within the models/ directory at +# the top-level of this source tree. + +import textwrap +from datetime import datetime +from typing import Any + +from llama_stack.models.llama.datatypes import ( + BuiltinTool, + ToolDefinition, +) + +from .base import PromptTemplate, PromptTemplateGeneratorBase + + +class SystemDefaultGenerator(PromptTemplateGeneratorBase): + def gen(self, *args, **kwargs) -> PromptTemplate: + template_str = textwrap.dedent( + """ + Cutting Knowledge Date: December 2023 + Today Date: {{ today }} + """ + ) + return PromptTemplate( + template_str.lstrip("\n"), + { + "today": datetime.now().strftime("%d %B %Y") # noqa: DTZ005 - we don't care about timezones here since we are displaying the date + }, + ) + + def data_examples(self) -> list[Any]: + return [None] + + +class BuiltinToolGenerator(PromptTemplateGeneratorBase): + def _tool_breakdown(self, tools: list[ToolDefinition]): + builtin_tools, custom_tools = [], [] + for dfn in tools: + if isinstance(dfn.tool_name, BuiltinTool): + builtin_tools.append(dfn) + else: + custom_tools.append(dfn) + + return builtin_tools, custom_tools + + def gen(self, tools: list[ToolDefinition]) -> PromptTemplate: + builtin_tools, custom_tools = self._tool_breakdown(tools) + template_str = textwrap.dedent( + """ + {% if builtin_tools or custom_tools -%} + Environment: ipython + {% endif -%} + {% set builtin_tools = builtin_tools | reject('equalto', 'code_interpreter') | list -%} + {% if builtin_tools -%} + Tools: {{ builtin_tools | join(", ") | trim -}} + {% endif %} + """ + ) + return PromptTemplate( + template_str.lstrip("\n"), + { + "builtin_tools": [t.tool_name.value for t in builtin_tools], + "custom_tools": custom_tools, + }, + ) + + def data_examples(self) -> list[list[ToolDefinition]]: + return [ + # builtin tools + [ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ToolDefinition(tool_name=BuiltinTool.brave_search), + ToolDefinition(tool_name=BuiltinTool.wolfram_alpha), + ], + # only code interpretor + [ + ToolDefinition(tool_name=BuiltinTool.code_interpreter), + ], + ] + + +class JsonCustomToolGenerator(PromptTemplateGeneratorBase): + def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: + template_str = textwrap.dedent( + """ + Answer the user's question by making use of the following functions if needed. + If none of the function can be used, please say so. + Here is a list of functions in JSON format: + {% for t in custom_tools -%} + {# manually setting up JSON because jinja sorts keys in unexpected ways -#} + {%- set tname = t.tool_name -%} + {%- set tdesc = t.description -%} + {%- set tprops = t.input_schema.get('properties', {}) -%} + {%- set required_params = t.input_schema.get('required', []) -%} + { + "type": "function", + "function": { + "name": "{{tname}}", + "description": "{{tdesc}}", + "parameters": { + "type": "object", + "properties": [ + {%- for name, param in tprops.items() %} + { + "{{name}}": { + "type": "object", + "description": "{{param.get('description', '')}}" + } + }{% if not loop.last %},{% endif %} + {%- endfor %} + ], + "required": {{ required_params | tojson }} + } + } + } + {% endfor %} + Return function calls in JSON format. + """ + ) + + return PromptTemplate( + template_str.lstrip("\n"), + {"custom_tools": [t.model_dump() for t in custom_tools]}, + ) + + def data_examples(self) -> list[list[ToolDefinition]]: + return [ + [ + ToolDefinition( + tool_name="trending_songs", + description="Returns the trending songs on a Music site", + input_schema={ + "type": "object", + "properties": { + "n": { + "type": "int", + "description": "The number of songs to return", + }, + "genre": { + "type": "str", + "description": "The genre of the songs to return", + }, + }, + "required": ["n"], + }, + ), + ] + ] + + +class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase): + def gen(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: + template_str = textwrap.dedent( + """ + You have access to the following functions: + + {% for t in custom_tools %} + {#- manually setting up JSON because jinja sorts keys in unexpected ways -#} + {%- set tname = t.tool_name -%} + {%- set tdesc = t.description -%} + {%- set tprops = t.input_schema.get('properties', {}) -%} + {%- set modified_params = {} -%} + {%- for key, value in tprops.items() -%} + {%- set param_copy = value.copy() -%} + {%- if 'default' in param_copy -%} + {%- set _ = param_copy.pop('default', None) -%} + {%- endif -%} + {%- set _ = modified_params.update({key: param_copy}) -%} + {%- endfor -%} + {%- set tparams = modified_params | tojson -%} + Use the function '{{ tname }}' to '{{ tdesc }}': + {"name": "{{tname}}", "description": "{{tdesc}}", "parameters": {{tparams}}} + + {% endfor -%} + Think very carefully before calling functions. + If you choose to call a function ONLY reply in the following format with no prefix or suffix: + + {"example_name": "example_value"} + + Reminder: + - If looking for real time information use relevant functions before falling back to brave_search + - Function calls MUST follow the specified format, start with + - Required parameters MUST be specified + - Only call one function at a time + - Put the entire function call reply on one line + """ + ) + return PromptTemplate( + template_str.lstrip("\n"), + {"custom_tools": [t.model_dump() for t in custom_tools]}, + ) + + def data_examples(self) -> list[list[ToolDefinition]]: + return [ + [ + ToolDefinition( + tool_name="trending_songs", + description="Returns the trending songs on a Music site", + input_schema={ + "type": "object", + "properties": { + "n": { + "type": "int", + "description": "The number of songs to return", + }, + "genre": { + "type": "str", + "description": "The genre of the songs to return", + }, + }, + "required": ["n"], + }, + ), + ] + ] + + +class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 + DEFAULT_PROMPT = textwrap.dedent( + """ + You are a helpful assistant. You have access to functions, but you should only use them if they are required. + You are an expert in composing functions. You are given a question and a set of possible functions. + Based on the question, you may or may not need to make one function/tool call to achieve the purpose. + + If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] + If you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format. + For a boolean parameter, be sure to use `True` or `False` (capitalized) for the value. + + + {{ function_description }} + """.strip("\n") + ) + + def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate: + system_prompt = system_prompt or self.DEFAULT_PROMPT + return PromptTemplate( + system_prompt, + {"function_description": self._gen_function_description(custom_tools)}, + ) + + def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> str: + template_str = textwrap.dedent( + """ + Here is a list of functions in JSON format that you can invoke. + + [ + {% for t in tools -%} + {# manually setting up JSON because jinja sorts keys in unexpected ways -#} + {%- set tname = t.tool_name -%} + {%- set tdesc = t.description -%} + {%- set tprops = (t.input_schema or {}).get('properties', {}) -%} + {%- set required_params = (t.input_schema or {}).get('required', []) -%} + { + "name": "{{tname}}", + "description": "{{tdesc}}", + "parameters": { + "type": "dict", + "required": {{ required_params | tojson }}, + "properties": { + {%- for name, param in tprops.items() %} + "{{name}}": { + "type": "{{param.get('type', 'string')}}", + "description": "{{param.get('description', '')}}"{% if param.get('default') %}, + "default": "{{param.get('default')}}"{% endif %} + }{% if not loop.last %},{% endif %} + {%- endfor %} + } + } + }{% if not loop.last %}, + {% endif -%} + {%- endfor %} + ] + + You can answer general questions or invoke tools when necessary. + In addition to tool calls, you should also augment your responses by using the tool outputs. + + """ + ) + template = PromptTemplate( + template_str.strip("\n"), + {"tools": [t.model_dump() for t in custom_tools]}, + ) + rendered: str = template.render() + return rendered + + def data_examples(self) -> list[list[ToolDefinition]]: + return [ + [ + ToolDefinition( + tool_name="get_weather", + description="Get weather info for places", + input_schema={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city to get the weather for", + }, + "metric": { + "type": "string", + "description": "The metric for weather. Options are: celsius, fahrenheit", + "default": "celsius", + }, + }, + "required": ["city"], + }, + ), + ] + ] diff --git a/llama_stack/models/llama/llama3/prompt_templates/tool_response.py b/src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py similarity index 100% rename from llama_stack/models/llama/llama3/prompt_templates/tool_response.py rename to src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py diff --git a/llama_stack/core/ui/page/distribution/__init__.py b/src/llama_stack/models/llama/llama3/quantization/__init__.py similarity index 100% rename from llama_stack/core/ui/page/distribution/__init__.py rename to src/llama_stack/models/llama/llama3/quantization/__init__.py diff --git a/llama_stack/models/llama/llama3/quantization/loader.py b/src/llama_stack/models/llama/llama3/quantization/loader.py similarity index 100% rename from llama_stack/models/llama/llama3/quantization/loader.py rename to src/llama_stack/models/llama/llama3/quantization/loader.py diff --git a/llama_stack/models/llama/llama3/template_data.py b/src/llama_stack/models/llama/llama3/template_data.py similarity index 100% rename from llama_stack/models/llama/llama3/template_data.py rename to src/llama_stack/models/llama/llama3/template_data.py diff --git a/llama_stack/models/llama/llama3/tokenizer.model b/src/llama_stack/models/llama/llama3/tokenizer.model similarity index 100% rename from llama_stack/models/llama/llama3/tokenizer.model rename to src/llama_stack/models/llama/llama3/tokenizer.model diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/src/llama_stack/models/llama/llama3/tokenizer.py similarity index 100% rename from llama_stack/models/llama/llama3/tokenizer.py rename to src/llama_stack/models/llama/llama3/tokenizer.py diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/src/llama_stack/models/llama/llama3/tool_utils.py similarity index 98% rename from llama_stack/models/llama/llama3/tool_utils.py rename to src/llama_stack/models/llama/llama3/tool_utils.py index 8c12fe680..6f919e1fa 100644 --- a/llama_stack/models/llama/llama3/tool_utils.py +++ b/src/llama_stack/models/llama/llama3/tool_utils.py @@ -8,8 +8,9 @@ import json import re from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import BuiltinTool, ToolCall, ToolPromptFormat -from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat +from ..datatypes import RecursiveType logger = get_logger(name=__name__, category="models::llama") diff --git a/llama_stack/models/llama/llama3_1/__init__.py b/src/llama_stack/models/llama/llama3_1/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3_1/__init__.py rename to src/llama_stack/models/llama/llama3_1/__init__.py diff --git a/llama_stack/models/llama/llama3_1/prompt_format.md b/src/llama_stack/models/llama/llama3_1/prompt_format.md similarity index 100% rename from llama_stack/models/llama/llama3_1/prompt_format.md rename to src/llama_stack/models/llama/llama3_1/prompt_format.md diff --git a/llama_stack/models/llama/llama3_1/prompts.py b/src/llama_stack/models/llama/llama3_1/prompts.py similarity index 100% rename from llama_stack/models/llama/llama3_1/prompts.py rename to src/llama_stack/models/llama/llama3_1/prompts.py diff --git a/llama_stack/core/ui/page/evaluations/__init__.py b/src/llama_stack/models/llama/llama3_2/__init__.py similarity index 100% rename from llama_stack/core/ui/page/evaluations/__init__.py rename to src/llama_stack/models/llama/llama3_2/__init__.py diff --git a/llama_stack/models/llama/llama3_2/prompts_text.py b/src/llama_stack/models/llama/llama3_2/prompts_text.py similarity index 100% rename from llama_stack/models/llama/llama3_2/prompts_text.py rename to src/llama_stack/models/llama/llama3_2/prompts_text.py diff --git a/llama_stack/models/llama/llama3_2/prompts_vision.py b/src/llama_stack/models/llama/llama3_2/prompts_vision.py similarity index 100% rename from llama_stack/models/llama/llama3_2/prompts_vision.py rename to src/llama_stack/models/llama/llama3_2/prompts_vision.py diff --git a/llama_stack/models/llama/llama3_2/text_prompt_format.md b/src/llama_stack/models/llama/llama3_2/text_prompt_format.md similarity index 100% rename from llama_stack/models/llama/llama3_2/text_prompt_format.md rename to src/llama_stack/models/llama/llama3_2/text_prompt_format.md diff --git a/llama_stack/models/llama/llama3_2/vision_prompt_format.md b/src/llama_stack/models/llama/llama3_2/vision_prompt_format.md similarity index 100% rename from llama_stack/models/llama/llama3_2/vision_prompt_format.md rename to src/llama_stack/models/llama/llama3_2/vision_prompt_format.md diff --git a/llama_stack/core/ui/page/playground/__init__.py b/src/llama_stack/models/llama/llama3_3/__init__.py similarity index 100% rename from llama_stack/core/ui/page/playground/__init__.py rename to src/llama_stack/models/llama/llama3_3/__init__.py diff --git a/llama_stack/models/llama/llama3_3/prompts.py b/src/llama_stack/models/llama/llama3_3/prompts.py similarity index 100% rename from llama_stack/models/llama/llama3_3/prompts.py rename to src/llama_stack/models/llama/llama3_3/prompts.py diff --git a/llama_stack/core/utils/__init__.py b/src/llama_stack/models/llama/llama4/__init__.py similarity index 100% rename from llama_stack/core/utils/__init__.py rename to src/llama_stack/models/llama/llama4/__init__.py diff --git a/llama_stack/models/llama/llama4/args.py b/src/llama_stack/models/llama/llama4/args.py similarity index 100% rename from llama_stack/models/llama/llama4/args.py rename to src/llama_stack/models/llama/llama4/args.py diff --git a/llama_stack/models/llama/llama4/chat_format.py b/src/llama_stack/models/llama/llama4/chat_format.py similarity index 100% rename from llama_stack/models/llama/llama4/chat_format.py rename to src/llama_stack/models/llama/llama4/chat_format.py diff --git a/llama_stack/models/llama/llama4/datatypes.py b/src/llama_stack/models/llama/llama4/datatypes.py similarity index 100% rename from llama_stack/models/llama/llama4/datatypes.py rename to src/llama_stack/models/llama/llama4/datatypes.py diff --git a/llama_stack/models/llama/llama4/ffn.py b/src/llama_stack/models/llama/llama4/ffn.py similarity index 100% rename from llama_stack/models/llama/llama4/ffn.py rename to src/llama_stack/models/llama/llama4/ffn.py diff --git a/llama_stack/models/llama/llama4/generation.py b/src/llama_stack/models/llama/llama4/generation.py similarity index 100% rename from llama_stack/models/llama/llama4/generation.py rename to src/llama_stack/models/llama/llama4/generation.py diff --git a/llama_stack/models/llama/llama4/model.py b/src/llama_stack/models/llama/llama4/model.py similarity index 100% rename from llama_stack/models/llama/llama4/model.py rename to src/llama_stack/models/llama/llama4/model.py diff --git a/llama_stack/models/llama/llama4/moe.py b/src/llama_stack/models/llama/llama4/moe.py similarity index 100% rename from llama_stack/models/llama/llama4/moe.py rename to src/llama_stack/models/llama/llama4/moe.py diff --git a/llama_stack/models/llama/llama4/preprocess.py b/src/llama_stack/models/llama/llama4/preprocess.py similarity index 100% rename from llama_stack/models/llama/llama4/preprocess.py rename to src/llama_stack/models/llama/llama4/preprocess.py diff --git a/llama_stack/models/llama/llama4/prompt_format.md b/src/llama_stack/models/llama/llama4/prompt_format.md similarity index 100% rename from llama_stack/models/llama/llama4/prompt_format.md rename to src/llama_stack/models/llama/llama4/prompt_format.md diff --git a/llama_stack/distributions/__init__.py b/src/llama_stack/models/llama/llama4/prompt_templates/__init__.py similarity index 100% rename from llama_stack/distributions/__init__.py rename to src/llama_stack/models/llama/llama4/prompt_templates/__init__.py diff --git a/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py new file mode 100644 index 000000000..feded9f8c --- /dev/null +++ b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py @@ -0,0 +1,136 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# top-level folder for each specific model found within the models/ directory at +# the top-level of this source tree. + +import textwrap + +from llama_stack.models.llama.datatypes import ToolDefinition +from llama_stack.models.llama.llama3.prompt_templates.base import ( + PromptTemplate, + PromptTemplateGeneratorBase, +) + + +class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 + DEFAULT_PROMPT = textwrap.dedent( + """ + You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines: + + 1. FUNCTION CALLS: + - ONLY use functions that are EXPLICITLY listed in the function list below + - If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information" + - If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information" + - If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s) + - Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)] + Examples: + CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list + INCORRECT: get_weather(location="New York") + INCORRECT: Let me check the weather: [get_weather(location="New York")] + INCORRECT: [get_events(location="Singapore")] <- If function not in list + + 2. RESPONSE RULES: + - For pure function requests matching a listed function: ONLY output the function call(s) + - For knowledge questions: ONLY output text + - For missing parameters: ONLY request the specific missing parameters + - For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call. + - If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations + - NEVER combine text and function calls in the same response + - NEVER suggest alternative functions when the requested service is unavailable + - NEVER create or invent new functions not listed below + + 3. STRICT BOUNDARIES: + - ONLY use functions from the list below - no exceptions + - NEVER use a function as an alternative to unavailable information + - NEVER call functions not present in the function list + - NEVER add explanatory text to function calls + - NEVER respond with empty brackets + - Use proper Python/JSON syntax for function calls + - Check the function list carefully before responding + + 4. TOOL RESPONSE HANDLING: + - When receiving tool responses: provide concise, natural language responses + - Don't repeat tool response verbatim + - Don't add supplementary information + + {{ function_description }} + """.strip("\n") + ) + + def gen(self, custom_tools: list[ToolDefinition], system_prompt: str | None = None) -> PromptTemplate: + system_prompt = system_prompt or self.DEFAULT_PROMPT + return PromptTemplate( + system_prompt, + {"function_description": self._gen_function_description(custom_tools)}, + ) + + def _gen_function_description(self, custom_tools: list[ToolDefinition]) -> PromptTemplate: + template_str = textwrap.dedent( + """ + Here is a list of functions in JSON format that you can invoke: + [ + {% for t in tools -%} + {# manually setting up JSON because jinja sorts keys in unexpected ways -#} + {%- set tname = t.tool_name -%} + {%- set tdesc = t.description -%} + {%- set tprops = t.input_schema.get('properties', {}) -%} + {%- set required_params = t.input_schema.get('required', []) -%} + { + "name": "{{tname}}", + "description": "{{tdesc}}", + "parameters": { + "type": "dict", + "required": {{ required_params | tojson }}, + "properties": { + {%- for name, param in tprops.items() %} + "{{name}}": { + "type": "{{param.get('type', 'string')}}", + "description": "{{param.get('description', '')}}"{% if param.get('default') %}, + "default": "{{param.get('default')}}"{% endif %} + }{% if not loop.last %},{% endif %} + {%- endfor %} + } + } + }{% if not loop.last %}, + {% endif -%} + {%- endfor %} + ] + """ + ) + return PromptTemplate( + template_str.strip("\n"), + {"tools": [t.model_dump() for t in custom_tools]}, + ).render() + + def data_examples(self) -> list[list[ToolDefinition]]: + return [ + [ + ToolDefinition( + tool_name="get_weather", + description="Get weather info for places", + input_schema={ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The name of the city to get the weather for", + }, + "metric": { + "type": "string", + "description": "The metric for weather. Options are: celsius, fahrenheit", + "default": "celsius", + }, + }, + "required": ["city"], + }, + ), + ] + ] diff --git a/llama_stack/models/llama/llama4/prompts.py b/src/llama_stack/models/llama/llama4/prompts.py similarity index 100% rename from llama_stack/models/llama/llama4/prompts.py rename to src/llama_stack/models/llama/llama4/prompts.py diff --git a/llama_stack/models/__init__.py b/src/llama_stack/models/llama/llama4/quantization/__init__.py similarity index 100% rename from llama_stack/models/__init__.py rename to src/llama_stack/models/llama/llama4/quantization/__init__.py diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/src/llama_stack/models/llama/llama4/quantization/loader.py similarity index 100% rename from llama_stack/models/llama/llama4/quantization/loader.py rename to src/llama_stack/models/llama/llama4/quantization/loader.py diff --git a/llama_stack/models/llama/llama4/tokenizer.model b/src/llama_stack/models/llama/llama4/tokenizer.model similarity index 100% rename from llama_stack/models/llama/llama4/tokenizer.model rename to src/llama_stack/models/llama/llama4/tokenizer.model diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/src/llama_stack/models/llama/llama4/tokenizer.py similarity index 100% rename from llama_stack/models/llama/llama4/tokenizer.py rename to src/llama_stack/models/llama/llama4/tokenizer.py diff --git a/llama_stack/models/llama/__init__.py b/src/llama_stack/models/llama/llama4/vision/__init__.py similarity index 100% rename from llama_stack/models/llama/__init__.py rename to src/llama_stack/models/llama/llama4/vision/__init__.py diff --git a/llama_stack/models/llama/llama4/vision/embedding.py b/src/llama_stack/models/llama/llama4/vision/embedding.py similarity index 100% rename from llama_stack/models/llama/llama4/vision/embedding.py rename to src/llama_stack/models/llama/llama4/vision/embedding.py diff --git a/llama_stack/models/llama/llama4/vision/encoder.py b/src/llama_stack/models/llama/llama4/vision/encoder.py similarity index 100% rename from llama_stack/models/llama/llama4/vision/encoder.py rename to src/llama_stack/models/llama/llama4/vision/encoder.py diff --git a/llama_stack/models/llama/prompt_format.py b/src/llama_stack/models/llama/prompt_format.py similarity index 100% rename from llama_stack/models/llama/prompt_format.py rename to src/llama_stack/models/llama/prompt_format.py diff --git a/llama_stack/models/llama/quantize_impls.py b/src/llama_stack/models/llama/quantize_impls.py similarity index 100% rename from llama_stack/models/llama/quantize_impls.py rename to src/llama_stack/models/llama/quantize_impls.py diff --git a/llama_stack/models/llama/resources/dog.jpg b/src/llama_stack/models/llama/resources/dog.jpg similarity index 100% rename from llama_stack/models/llama/resources/dog.jpg rename to src/llama_stack/models/llama/resources/dog.jpg diff --git a/llama_stack/models/llama/resources/pasta.jpeg b/src/llama_stack/models/llama/resources/pasta.jpeg similarity index 100% rename from llama_stack/models/llama/resources/pasta.jpeg rename to src/llama_stack/models/llama/resources/pasta.jpeg diff --git a/llama_stack/models/llama/resources/small_dog.jpg b/src/llama_stack/models/llama/resources/small_dog.jpg similarity index 100% rename from llama_stack/models/llama/resources/small_dog.jpg rename to src/llama_stack/models/llama/resources/small_dog.jpg diff --git a/llama_stack/models/llama/sku_list.py b/src/llama_stack/models/llama/sku_list.py similarity index 100% rename from llama_stack/models/llama/sku_list.py rename to src/llama_stack/models/llama/sku_list.py diff --git a/llama_stack/models/llama/sku_types.py b/src/llama_stack/models/llama/sku_types.py similarity index 100% rename from llama_stack/models/llama/sku_types.py rename to src/llama_stack/models/llama/sku_types.py diff --git a/llama_stack/models/llama/tokenizer_utils.py b/src/llama_stack/models/llama/tokenizer_utils.py similarity index 100% rename from llama_stack/models/llama/tokenizer_utils.py rename to src/llama_stack/models/llama/tokenizer_utils.py diff --git a/llama_stack/models/llama/llama3/__init__.py b/src/llama_stack/providers/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3/__init__.py rename to src/llama_stack/providers/__init__.py diff --git a/llama_stack/models/llama/llama3/quantization/__init__.py b/src/llama_stack/providers/inline/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3/quantization/__init__.py rename to src/llama_stack/providers/inline/__init__.py diff --git a/llama_stack/models/llama/llama3_2/__init__.py b/src/llama_stack/providers/inline/agents/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3_2/__init__.py rename to src/llama_stack/providers/inline/agents/__init__.py diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py new file mode 100644 index 000000000..c9c7d348a --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import AccessRule, Api + +from .config import MetaReferenceAgentsImplConfig + + +async def get_provider_impl( + config: MetaReferenceAgentsImplConfig, + deps: dict[Api, Any], + policy: list[AccessRule], +): + from .agents import MetaReferenceAgentsImpl + + impl = MetaReferenceAgentsImpl( + config, + deps[Api.inference], + deps[Api.vector_io], + deps.get(Api.safety), + deps[Api.tool_runtime], + deps[Api.tool_groups], + deps[Api.conversations], + deps[Api.prompts], + deps[Api.files], + policy, + ) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py new file mode 100644 index 000000000..39cc22be7 --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -0,0 +1,162 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl +from llama_stack.log import get_logger +from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack_api import ( + Agents, + Conversations, + Files, + Inference, + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + OpenAIDeleteResponseObject, + OpenAIResponseInput, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponsePrompt, + OpenAIResponseText, + Order, + Prompts, + ResponseGuardrail, + Safety, + ToolGroups, + ToolRuntime, + VectorIO, +) + +from .config import MetaReferenceAgentsImplConfig +from .responses.openai_responses import OpenAIResponsesImpl + +logger = get_logger(name=__name__, category="agents::meta_reference") + + +class MetaReferenceAgentsImpl(Agents): + def __init__( + self, + config: MetaReferenceAgentsImplConfig, + inference_api: Inference, + vector_io_api: VectorIO, + safety_api: Safety | None, + tool_runtime_api: ToolRuntime, + tool_groups_api: ToolGroups, + conversations_api: Conversations, + prompts_api: Prompts, + files_api: Files, + policy: list[AccessRule], + ): + self.config = config + self.inference_api = inference_api + self.vector_io_api = vector_io_api + self.safety_api = safety_api + self.tool_runtime_api = tool_runtime_api + self.tool_groups_api = tool_groups_api + self.conversations_api = conversations_api + self.prompts_api = prompts_api + self.files_api = files_api + self.in_memory_store = InmemoryKVStoreImpl() + self.openai_responses_impl: OpenAIResponsesImpl | None = None + self.policy = policy + + async def initialize(self) -> None: + self.persistence_store = await kvstore_impl(self.config.persistence.agent_state) + self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy) + await self.responses_store.initialize() + self.openai_responses_impl = OpenAIResponsesImpl( + inference_api=self.inference_api, + tool_groups_api=self.tool_groups_api, + tool_runtime_api=self.tool_runtime_api, + responses_store=self.responses_store, + vector_io_api=self.vector_io_api, + safety_api=self.safety_api, + conversations_api=self.conversations_api, + prompts_api=self.prompts_api, + files_api=self.files_api, + ) + + async def shutdown(self) -> None: + pass + + # OpenAI responses + async def get_openai_response( + self, + response_id: str, + ) -> OpenAIResponseObject: + assert self.openai_responses_impl is not None, "OpenAI responses not initialized" + return await self.openai_responses_impl.get_openai_response(response_id) + + async def create_openai_response( + self, + input: str | list[OpenAIResponseInput], + model: str, + prompt: OpenAIResponsePrompt | None = None, + instructions: str | None = None, + parallel_tool_calls: bool | None = True, + previous_response_id: str | None = None, + conversation: str | None = None, + store: bool | None = True, + stream: bool | None = False, + temperature: float | None = None, + text: OpenAIResponseText | None = None, + tools: list[OpenAIResponseInputTool] | None = None, + include: list[str] | None = None, + max_infer_iters: int | None = 10, + guardrails: list[ResponseGuardrail] | None = None, + max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, + ) -> OpenAIResponseObject: + assert self.openai_responses_impl is not None, "OpenAI responses not initialized" + result = await self.openai_responses_impl.create_openai_response( + input, + model, + prompt, + instructions, + previous_response_id, + conversation, + store, + stream, + temperature, + text, + tools, + include, + max_infer_iters, + guardrails, + parallel_tool_calls, + max_tool_calls, + metadata, + ) + return result # type: ignore[no-any-return] + + async def list_openai_responses( + self, + after: str | None = None, + limit: int | None = 50, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseObject: + assert self.openai_responses_impl is not None, "OpenAI responses not initialized" + return await self.openai_responses_impl.list_openai_responses(after, limit, model, order) + + async def list_openai_response_input_items( + self, + response_id: str, + after: str | None = None, + before: str | None = None, + include: list[str] | None = None, + limit: int | None = 20, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseInputItem: + assert self.openai_responses_impl is not None, "OpenAI responses not initialized" + return await self.openai_responses_impl.list_openai_response_input_items( + response_id, after, before, include, limit, order + ) + + async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: + assert self.openai_responses_impl is not None, "OpenAI responses not initialized" + return await self.openai_responses_impl.delete_openai_response(response_id) diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/src/llama_stack/providers/inline/agents/meta_reference/config.py similarity index 100% rename from llama_stack/providers/inline/agents/meta_reference/config.py rename to src/llama_stack/providers/inline/agents/meta_reference/config.py diff --git a/llama_stack/models/llama/llama3_3/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py similarity index 100% rename from llama_stack/models/llama/llama3_3/__init__.py rename to src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py new file mode 100644 index 000000000..9cf30908c --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -0,0 +1,562 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +import time +import uuid +from collections.abc import AsyncIterator + +from pydantic import BaseModel, TypeAdapter + +from llama_stack.log import get_logger +from llama_stack.providers.utils.responses.responses_store import ( + ResponsesStore, + _OpenAIResponseObjectWithInputAndMessages, +) +from llama_stack_api import ( + ConversationItem, + Conversations, + Files, + Inference, + InvalidConversationIdError, + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + OpenAIChatCompletionContentPartParam, + OpenAIDeleteResponseObject, + OpenAIMessageParam, + OpenAIResponseInput, + OpenAIResponseInputMessageContentFile, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseMessage, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponsePrompt, + OpenAIResponseText, + OpenAIResponseTextFormat, + OpenAISystemMessageParam, + OpenAIUserMessageParam, + Order, + Prompts, + ResponseGuardrailSpec, + Safety, + ToolGroups, + ToolRuntime, + VectorIO, +) + +from .streaming import StreamingResponseOrchestrator +from .tool_executor import ToolExecutor +from .types import ChatCompletionContext, ToolContext +from .utils import ( + convert_response_content_to_chat_content, + convert_response_input_to_chat_messages, + convert_response_text_to_chat_response_format, + extract_guardrail_ids, +) + +logger = get_logger(name=__name__, category="openai_responses") + + +class OpenAIResponsePreviousResponseWithInputItems(BaseModel): + input_items: ListOpenAIResponseInputItem + response: OpenAIResponseObject + + +class OpenAIResponsesImpl: + def __init__( + self, + inference_api: Inference, + tool_groups_api: ToolGroups, + tool_runtime_api: ToolRuntime, + responses_store: ResponsesStore, + vector_io_api: VectorIO, # VectorIO + safety_api: Safety | None, + conversations_api: Conversations, + prompts_api: Prompts, + files_api: Files, + ): + self.inference_api = inference_api + self.tool_groups_api = tool_groups_api + self.tool_runtime_api = tool_runtime_api + self.responses_store = responses_store + self.vector_io_api = vector_io_api + self.safety_api = safety_api + self.conversations_api = conversations_api + self.tool_executor = ToolExecutor( + tool_groups_api=tool_groups_api, + tool_runtime_api=tool_runtime_api, + vector_io_api=vector_io_api, + ) + self.prompts_api = prompts_api + self.files_api = files_api + + async def _prepend_previous_response( + self, + input: str | list[OpenAIResponseInput], + previous_response: _OpenAIResponseObjectWithInputAndMessages, + ): + # Convert Sequence to list for mutation + new_input_items = list(previous_response.input) + new_input_items.extend(previous_response.output) + + if isinstance(input, str): + new_input_items.append(OpenAIResponseMessage(content=input, role="user")) + else: + new_input_items.extend(input) + + return new_input_items + + async def _process_input_with_previous_response( + self, + input: str | list[OpenAIResponseInput], + tools: list[OpenAIResponseInputTool] | None, + previous_response_id: str | None, + conversation: str | None, + ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]: + """Process input with optional previous response context. + + Returns: + tuple: (all_input for storage, messages for chat completion, tool context) + """ + tool_context = ToolContext(tools) + if previous_response_id: + previous_response: _OpenAIResponseObjectWithInputAndMessages = ( + await self.responses_store.get_response_object(previous_response_id) + ) + all_input = await self._prepend_previous_response(input, previous_response) + + if previous_response.messages: + # Use stored messages directly and convert only new input + message_adapter = TypeAdapter(list[OpenAIMessageParam]) + messages = message_adapter.validate_python(previous_response.messages) + new_messages = await convert_response_input_to_chat_messages( + input, previous_messages=messages, files_api=self.files_api + ) + messages.extend(new_messages) + else: + # Backward compatibility: reconstruct from inputs + messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api) + + tool_context.recover_tools_from_previous_response(previous_response) + elif conversation is not None: + conversation_items = await self.conversations_api.list_items(conversation, order="asc") + + # Use stored messages as source of truth (like previous_response.messages) + stored_messages = await self.responses_store.get_conversation_messages(conversation) + + all_input = input + if not conversation_items.data: + # First turn - just convert the new input + messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api) + else: + if not stored_messages: + all_input = conversation_items.data + if isinstance(input, str): + all_input.append( + OpenAIResponseMessage( + role="user", content=[OpenAIResponseInputMessageContentText(text=input)] + ) + ) + else: + all_input.extend(input) + else: + all_input = input + + messages = stored_messages or [] + new_messages = await convert_response_input_to_chat_messages( + all_input, previous_messages=messages, files_api=self.files_api + ) + messages.extend(new_messages) + else: + all_input = input + messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api) + + return all_input, messages, tool_context + + async def _prepend_prompt( + self, + messages: list[OpenAIMessageParam], + openai_response_prompt: OpenAIResponsePrompt | None, + ) -> None: + """Prepend prompt template to messages, resolving text/image/file variables. + + :param messages: List of OpenAIMessageParam objects + :param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables + :returns: string of utf-8 characters + """ + if not openai_response_prompt or not openai_response_prompt.id: + return + + prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None + cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version) + + if not cur_prompt or not cur_prompt.prompt: + return + + cur_prompt_text = cur_prompt.prompt + cur_prompt_variables = cur_prompt.variables + + if not openai_response_prompt.variables: + messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text)) + return + + # Validate that all provided variables exist in the prompt + for name in openai_response_prompt.variables.keys(): + if name not in cur_prompt_variables: + raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}") + + # Separate text and media variables + text_substitutions = {} + media_content_parts: list[OpenAIChatCompletionContentPartParam] = [] + + for name, value in openai_response_prompt.variables.items(): + # Text variable found + if isinstance(value, OpenAIResponseInputMessageContentText): + text_substitutions[name] = value.text + + # Media variable found + elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile): + converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api) + if isinstance(converted_parts, list): + media_content_parts.extend(converted_parts) + + # Eg: {{product_photo}} becomes "[Image: product_photo]" + # This gives the model textual context about what media exists in the prompt + var_type = value.type.replace("input_", "").replace("_", " ").title() + text_substitutions[name] = f"[{var_type}: {name}]" + + def replace_variable(match: re.Match[str]) -> str: + var_name = match.group(1).strip() + return str(text_substitutions.get(var_name, match.group(0))) + + pattern = r"\{\{\s*(\w+)\s*\}\}" + processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text) + + # Insert system message with resolved text + messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text)) + + # If we have media, create a new user message because allows to ingest images and files + if media_content_parts: + messages.append(OpenAIUserMessageParam(content=media_content_parts)) + + async def get_openai_response( + self, + response_id: str, + ) -> OpenAIResponseObject: + response_with_input = await self.responses_store.get_response_object(response_id) + return response_with_input.to_response_object() + + async def list_openai_responses( + self, + after: str | None = None, + limit: int | None = 50, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseObject: + return await self.responses_store.list_responses(after, limit, model, order) + + async def list_openai_response_input_items( + self, + response_id: str, + after: str | None = None, + before: str | None = None, + include: list[str] | None = None, + limit: int | None = 20, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseInputItem: + """List input items for a given OpenAI response. + + :param response_id: The ID of the response to retrieve input items for. + :param after: An item ID to list items after, used for pagination. + :param before: An item ID to list items before, used for pagination. + :param include: Additional fields to include in the response. + :param limit: A limit on the number of objects to be returned. + :param order: The order to return the input items in. + :returns: An ListOpenAIResponseInputItem. + """ + return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order) + + async def _store_response( + self, + response: OpenAIResponseObject, + input: str | list[OpenAIResponseInput], + messages: list[OpenAIMessageParam], + ) -> None: + new_input_id = f"msg_{uuid.uuid4()}" + # Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues + input_items_data: list[OpenAIResponseInput] = [] + + if isinstance(input, str): + # synthesize a message from the input string + input_content = OpenAIResponseInputMessageContentText(text=input) + input_content_item = OpenAIResponseMessage( + role="user", + content=[input_content], + id=new_input_id, + ) + input_items_data = [input_content_item] + else: + # we already have a list of messages + for input_item in input: + if isinstance(input_item, OpenAIResponseMessage): + # These may or may not already have an id, so dump to dict, check for id, and add if missing + input_item_dict = input_item.model_dump() + if "id" not in input_item_dict: + input_item_dict["id"] = new_input_id + input_items_data.append(OpenAIResponseMessage(**input_item_dict)) + else: + input_items_data.append(input_item) + + await self.responses_store.store_response_object( + response_object=response, + input=input_items_data, + messages=messages, + ) + + async def create_openai_response( + self, + input: str | list[OpenAIResponseInput], + model: str, + prompt: OpenAIResponsePrompt | None = None, + instructions: str | None = None, + previous_response_id: str | None = None, + conversation: str | None = None, + store: bool | None = True, + stream: bool | None = False, + temperature: float | None = None, + text: OpenAIResponseText | None = None, + tools: list[OpenAIResponseInputTool] | None = None, + include: list[str] | None = None, + max_infer_iters: int | None = 10, + guardrails: list[str | ResponseGuardrailSpec] | None = None, + parallel_tool_calls: bool | None = None, + max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, + ): + stream = bool(stream) + text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text + + # Validate MCP tools: ensure Authorization header is not passed via headers dict + if tools: + from llama_stack_api.openai_responses import OpenAIResponseInputToolMCP + + for tool in tools: + if isinstance(tool, OpenAIResponseInputToolMCP) and tool.headers: + for key in tool.headers.keys(): + if key.lower() == "authorization": + raise ValueError( + "Authorization header cannot be passed via 'headers'. " + "Please use the 'authorization' parameter instead." + ) + + guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else [] + + # Validate that Safety API is available if guardrails are requested + if guardrail_ids and self.safety_api is None: + raise ValueError( + "Cannot process guardrails: Safety API is not configured.\n\n" + "To use guardrails, ensure the Safety API is configured in your stack, or remove " + "the 'guardrails' parameter from your request." + ) + + if conversation is not None: + if previous_response_id is not None: + raise ValueError( + "Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters." + ) + + if not conversation.startswith("conv_"): + raise InvalidConversationIdError(conversation) + + if max_tool_calls is not None and max_tool_calls < 1: + raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1") + + stream_gen = self._create_streaming_response( + input=input, + conversation=conversation, + model=model, + prompt=prompt, + instructions=instructions, + previous_response_id=previous_response_id, + store=store, + temperature=temperature, + text=text, + tools=tools, + max_infer_iters=max_infer_iters, + guardrail_ids=guardrail_ids, + parallel_tool_calls=parallel_tool_calls, + max_tool_calls=max_tool_calls, + metadata=metadata, + ) + + if stream: + return stream_gen + else: + final_response = None + final_event_type = None + failed_response = None + + async for stream_chunk in stream_gen: + match stream_chunk.type: + case "response.completed" | "response.incomplete": + if final_response is not None: + raise ValueError( + "The response stream produced multiple terminal responses! " + f"Earlier response from {final_event_type}" + ) + final_response = stream_chunk.response + final_event_type = stream_chunk.type + case "response.failed": + failed_response = stream_chunk.response + case _: + pass # Other event types don't have .response + + if failed_response is not None: + error_message = ( + failed_response.error.message + if failed_response and failed_response.error + else "Response stream failed without error details" + ) + raise RuntimeError(f"OpenAI response failed: {error_message}") + + if final_response is None: + raise ValueError("The response stream never reached a terminal state") + return final_response + + async def _create_streaming_response( + self, + input: str | list[OpenAIResponseInput], + model: str, + instructions: str | None = None, + previous_response_id: str | None = None, + conversation: str | None = None, + prompt: OpenAIResponsePrompt | None = None, + store: bool | None = True, + temperature: float | None = None, + text: OpenAIResponseText | None = None, + tools: list[OpenAIResponseInputTool] | None = None, + max_infer_iters: int | None = 10, + guardrail_ids: list[str] | None = None, + parallel_tool_calls: bool | None = True, + max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, + ) -> AsyncIterator[OpenAIResponseObjectStream]: + # These should never be None when called from create_openai_response (which sets defaults) + # but we assert here to help mypy understand the types + assert text is not None, "text must not be None" + assert max_infer_iters is not None, "max_infer_iters must not be None" + + # Input preprocessing + all_input, messages, tool_context = await self._process_input_with_previous_response( + input, tools, previous_response_id, conversation + ) + + if instructions: + messages.insert(0, OpenAISystemMessageParam(content=instructions)) + + # Prepend reusable prompt (if provided) + await self._prepend_prompt(messages, prompt) + + # Structured outputs + response_format = await convert_response_text_to_chat_response_format(text) + + ctx = ChatCompletionContext( + model=model, + messages=messages, + response_tools=tools, + temperature=temperature, + response_format=response_format, + tool_context=tool_context, + inputs=all_input, + ) + + # Create orchestrator and delegate streaming logic + response_id = f"resp_{uuid.uuid4()}" + created_at = int(time.time()) + + orchestrator = StreamingResponseOrchestrator( + inference_api=self.inference_api, + ctx=ctx, + response_id=response_id, + created_at=created_at, + prompt=prompt, + text=text, + max_infer_iters=max_infer_iters, + parallel_tool_calls=parallel_tool_calls, + tool_executor=self.tool_executor, + safety_api=self.safety_api, + guardrail_ids=guardrail_ids, + instructions=instructions, + max_tool_calls=max_tool_calls, + metadata=metadata, + ) + + # Stream the response + final_response = None + failed_response = None + + # Type as ConversationItem to avoid list invariance issues + output_items: list[ConversationItem] = [] + async for stream_chunk in orchestrator.create_response(): + match stream_chunk.type: + case "response.completed" | "response.incomplete": + final_response = stream_chunk.response + case "response.failed": + failed_response = stream_chunk.response + case "response.output_item.done": + item = stream_chunk.item + output_items.append(item) + case _: + pass # Other event types + + # Store and sync before yielding terminal events + # This ensures the storage/syncing happens even if the consumer breaks after receiving the event + if ( + stream_chunk.type in {"response.completed", "response.incomplete"} + and final_response + and failed_response is None + ): + messages_to_store = list( + filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages) + ) + if store: + # TODO: we really should work off of output_items instead of "final_messages" + await self._store_response( + response=final_response, + input=all_input, + messages=messages_to_store, + ) + + if conversation: + await self._sync_response_to_conversation(conversation, input, output_items) + await self.responses_store.store_conversation_messages(conversation, messages_to_store) + + yield stream_chunk + + async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: + return await self.responses_store.delete_response_object(response_id) + + async def _sync_response_to_conversation( + self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem] + ) -> None: + """Sync content and response messages to the conversation.""" + # Type as ConversationItem union to avoid list invariance issues + conversation_items: list[ConversationItem] = [] + + if isinstance(input, str): + conversation_items.append( + OpenAIResponseMessage(role="user", content=[OpenAIResponseInputMessageContentText(text=input)]) + ) + elif isinstance(input, list): + conversation_items.extend(input) + + conversation_items.extend(output_items) + + adapter = TypeAdapter(list[ConversationItem]) + validated_items = adapter.validate_python(conversation_items) + await self.conversations_api.add_items(conversation_id, validated_items) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py similarity index 90% rename from llama_stack/providers/inline/agents/meta_reference/responses/streaming.py rename to src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index e80ffcdd1..c778d65e7 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -8,10 +8,23 @@ import uuid from collections.abc import AsyncIterator from typing import Any -from llama_stack.apis.agents.openai_responses import ( +from opentelemetry import trace + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str +from llama_stack_api import ( AllowedToolsFilter, ApprovalFilter, + Inference, MCPListToolsTool, + ModelNotFoundError, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionToolCall, + OpenAIChoice, + OpenAIMessageParam, OpenAIResponseContentPartOutputText, OpenAIResponseContentPartReasoningText, OpenAIResponseContentPartRefusal, @@ -49,25 +62,15 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageWebSearchToolCall, + OpenAIResponsePrompt, OpenAIResponseText, OpenAIResponseUsage, OpenAIResponseUsageInputTokensDetails, OpenAIResponseUsageOutputTokensDetails, + OpenAIToolMessageParam, + Safety, WebSearchToolTypes, ) -from llama_stack.apis.inference import ( - Inference, - OpenAIAssistantMessageParam, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIChatCompletionToolCall, - OpenAIChoice, - OpenAIMessageParam, -) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.telemetry import tracing from .types import ChatCompletionContext, ChatCompletionResult from .utils import ( @@ -77,6 +80,7 @@ from .utils import ( ) logger = get_logger(name=__name__, category="agents::meta_reference") +tracer = trace.get_tracer(__name__) def convert_tooldef_to_chat_tool(tool_def): @@ -110,9 +114,13 @@ class StreamingResponseOrchestrator: text: OpenAIResponseText, max_infer_iters: int, tool_executor, # Will be the tool execution logic from the main class - instructions: str, - safety_api, + instructions: str | None, + safety_api: Safety | None, guardrail_ids: list[str] | None = None, + prompt: OpenAIResponsePrompt | None = None, + parallel_tool_calls: bool | None = None, + max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, ): self.inference_api = inference_api self.ctx = ctx @@ -123,9 +131,19 @@ class StreamingResponseOrchestrator: self.tool_executor = tool_executor self.safety_api = safety_api self.guardrail_ids = guardrail_ids or [] + self.prompt = prompt + # System message that is inserted into the model's context + self.instructions = instructions + # Whether to allow more than one function tool call generated per turn. + self.parallel_tool_calls = parallel_tool_calls + # Max number of total calls to built-in tools that can be processed in a response + self.max_tool_calls = max_tool_calls + self.metadata = metadata self.sequence_number = 0 # Store MCP tool mapping that gets built during tool processing - self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {} + self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ( + ctx.tool_context.previous_tools if ctx.tool_context else {} + ) # Track final messages after all tool executions self.final_messages: list[OpenAIMessageParam] = [] # mapping for annotations @@ -134,8 +152,8 @@ class StreamingResponseOrchestrator: self.accumulated_usage: OpenAIResponseUsage | None = None # Track if we've sent a refusal response self.violation_detected = False - # system message that is inserted into the model's context - self.instructions = instructions + # Track total calls made to built-in tools + self.accumulated_builtin_tool_calls = 0 async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream: """Create a refusal response to replace streaming content.""" @@ -148,6 +166,7 @@ class StreamingResponseOrchestrator: model=self.ctx.model, status="completed", output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")], + metadata=self.metadata, ) return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response) @@ -180,6 +199,10 @@ class StreamingResponseOrchestrator: error=error, usage=self.accumulated_usage, instructions=self.instructions, + prompt=self.prompt, + parallel_tool_calls=self.parallel_tool_calls, + max_tool_calls=self.max_tool_calls, + metadata=self.metadata, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: @@ -217,13 +240,16 @@ class StreamingResponseOrchestrator: while True: # Text is the default response format for chat completion so don't need to pass it # (some providers don't support non-empty response_format when tools are present) - response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format + response_format = ( + None if getattr(self.ctx.response_format, "type", None) == "text" else self.ctx.response_format + ) logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}") params = OpenAIChatCompletionRequestWithExtraBody( model=self.ctx.model, messages=messages, - tools=self.ctx.chat_tools, + # Pydantic models are dict-compatible but mypy treats them as distinct types + tools=self.ctx.chat_tools, # type: ignore[arg-type] stream=True, temperature=self.ctx.temperature, response_format=response_format, @@ -266,7 +292,12 @@ class StreamingResponseOrchestrator: # Handle choices with no tool calls for choice in current_response.choices: - if not (choice.message.tool_calls and self.ctx.response_tools): + has_tool_calls = ( + isinstance(choice.message, OpenAIAssistantMessageParam) + and choice.message.tool_calls + and self.ctx.response_tools + ) + if not has_tool_calls: output_messages.append( await convert_chat_choice_to_response_message( choice, @@ -305,6 +336,8 @@ class StreamingResponseOrchestrator: if last_completion_result and last_completion_result.finish_reason == "length": final_status = "incomplete" + except ModelNotFoundError: + raise except Exception as exc: # noqa: BLE001 self.final_messages = messages.copy() self.sequence_number += 1 @@ -716,7 +749,10 @@ class StreamingResponseOrchestrator: ) # Accumulate arguments for final response (only for subsequent chunks) - if not is_new_tool_call: + if not is_new_tool_call and response_tool_call is not None: + # Both should have functions since we're inside the tool_call.function check above + assert response_tool_call.function is not None + assert tool_call.function is not None response_tool_call.function.arguments = ( response_tool_call.function.arguments or "" ) + tool_call.function.arguments @@ -741,10 +777,13 @@ class StreamingResponseOrchestrator: for tool_call_index in sorted(chat_response_tool_calls.keys()): tool_call = chat_response_tool_calls[tool_call_index] # Ensure that arguments, if sent back to the inference provider, are not None - tool_call.function.arguments = tool_call.function.arguments or "{}" + if tool_call.function: + tool_call.function.arguments = tool_call.function.arguments or "{}" tool_call_item_id = tool_call_item_ids[tool_call_index] - final_arguments = tool_call.function.arguments - tool_call_name = chat_response_tool_calls[tool_call_index].function.name + final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}" + func = chat_response_tool_calls[tool_call_index].function + + tool_call_name = func.name if func else "" # Check if this is an MCP tool call is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server @@ -874,6 +913,17 @@ class StreamingResponseOrchestrator: """Coordinate execution of both function and non-function tool calls.""" # Execute non-function tool calls for tool_call in non_function_tool_calls: + # if total calls made to built-in and mcp tools exceed max_tool_calls + # then create a tool response message indicating the call was skipped + if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls: + logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.") + skipped_call_message = OpenAIToolMessageParam( + content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.", + tool_call_id=tool_call.id, + ) + next_turn_messages.append(skipped_call_message) + continue + # Find the item_id for this tool call matching_item_id = None for index, item_id in completion_result_data.tool_call_item_ids.items(): @@ -888,12 +938,11 @@ class StreamingResponseOrchestrator: self.sequence_number += 1 if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server: - item = OpenAIResponseOutputMessageMCPCall( + item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall( arguments="", name=tool_call.function.name, id=matching_item_id, server_label=self.mcp_tool_to_server[tool_call.function.name].server_label, - status="in_progress", ) elif tool_call.function.name == "web_search": item = OpenAIResponseOutputMessageWebSearchToolCall( @@ -955,6 +1004,9 @@ class StreamingResponseOrchestrator: if tool_response_message: next_turn_messages.append(tool_response_message) + # Track number of calls made to built-in and mcp tools + self.accumulated_builtin_tool_calls += 1 + # Execute function tool calls (client-side) for tool_call in function_tool_calls: # Find the item_id for this tool call from our tracking dictionary @@ -992,9 +1044,9 @@ class StreamingResponseOrchestrator: """Process all tools and emit appropriate streaming events.""" from openai.types.chat import ChatCompletionToolParam - from llama_stack.apis.tools import ToolDef from llama_stack.models.llama.datatypes import ToolDefinition from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + from llama_stack_api import ToolDef def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam: tool_def = ToolDefinition( @@ -1002,7 +1054,7 @@ class StreamingResponseOrchestrator: description=tool.description, input_schema=tool.input_schema, ) - return convert_tooldef_to_openai_tool(tool_def) + return convert_tooldef_to_openai_tool(tool_def) # type: ignore[return-value] # Returns dict but ChatCompletionToolParam expects TypedDict # Initialize chat_tools if not already set if self.ctx.chat_tools is None: @@ -1010,7 +1062,7 @@ class StreamingResponseOrchestrator: for input_tool in tools: if input_tool.type == "function": - self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) + self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" # Need to access tool_groups_api from tool_executor @@ -1049,8 +1101,8 @@ class StreamingResponseOrchestrator: if isinstance(mcp_tool.allowed_tools, list): always_allowed = mcp_tool.allowed_tools elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter): - always_allowed = mcp_tool.allowed_tools.always - never_allowed = mcp_tool.allowed_tools.never + # AllowedToolsFilter only has tool_names field (not allowed/disallowed) + always_allowed = mcp_tool.allowed_tools.tool_names # Call list_mcp_tools tool_defs = None @@ -1060,10 +1112,14 @@ class StreamingResponseOrchestrator: "server_url": mcp_tool.server_url, "mcp_list_tools_id": list_id, } - async with tracing.span("list_mcp_tools", attributes): + + # TODO: follow semantic conventions for Open Telemetry tool spans + # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span + with tracer.start_as_current_span("list_mcp_tools", attributes=attributes): tool_defs = await list_mcp_tools( endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, + headers=mcp_tool.headers, + authorization=mcp_tool.authorization, ) # Create the MCP list tools message @@ -1082,7 +1138,7 @@ class StreamingResponseOrchestrator: openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] - self.ctx.chat_tools.append(openai_tool) + self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict # Add to MCP tool mapping if t.name in self.mcp_tool_to_server: @@ -1114,13 +1170,17 @@ class StreamingResponseOrchestrator: self, output_messages: list[OpenAIResponseOutput] ) -> AsyncIterator[OpenAIResponseObjectStream]: # Handle all mcp tool lists from previous response that are still valid: - for tool in self.ctx.tool_context.previous_tool_listings: - async for evt in self._reuse_mcp_list_tools(tool, output_messages): - yield evt - # Process all remaining tools (including MCP tools) and emit streaming events - if self.ctx.tool_context.tools_to_process: - async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages): - yield stream_event + # tool_context can be None when no tools are provided in the response request + if self.ctx.tool_context: + for tool in self.ctx.tool_context.previous_tool_listings: + async for evt in self._reuse_mcp_list_tools(tool, output_messages): + yield evt + # Process all remaining tools (including MCP tools) and emit streaming events + if self.ctx.tool_context.tools_to_process: + async for stream_event in self._process_new_tools( + self.ctx.tool_context.tools_to_process, output_messages + ): + yield stream_event def _approval_required(self, tool_name: str) -> bool: if tool_name not in self.mcp_tool_to_server: @@ -1131,9 +1191,9 @@ class StreamingResponseOrchestrator: if mcp_server.require_approval == "never": return False if isinstance(mcp_server, ApprovalFilter): - if tool_name in mcp_server.always: + if mcp_server.always and tool_name in mcp_server.always: return True - if tool_name in mcp_server.never: + if mcp_server.never and tool_name in mcp_server.never: return False return True @@ -1214,7 +1274,7 @@ class StreamingResponseOrchestrator: openai_tool = convert_tooldef_to_openai_tool(tool_def) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] - self.ctx.chat_tools.append(openai_tool) + self.ctx.chat_tools.append(openai_tool) # type: ignore[arg-type] # Returns dict but ChatCompletionToolParam expects TypedDict mcp_list_message = OpenAIResponseOutputMessageMCPListTools( id=f"mcp_list_{uuid.uuid4()}", diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py new file mode 100644 index 000000000..d27a0f8ad --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -0,0 +1,489 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import json +from collections.abc import AsyncIterator +from typing import Any + +from opentelemetry import trace + +from llama_stack.log import get_logger +from llama_stack_api import ( + ImageContentItem, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCall, + OpenAIImageURL, + OpenAIResponseInputToolFileSearch, + OpenAIResponseInputToolMCP, + OpenAIResponseObjectStreamResponseFileSearchCallCompleted, + OpenAIResponseObjectStreamResponseFileSearchCallInProgress, + OpenAIResponseObjectStreamResponseFileSearchCallSearching, + OpenAIResponseObjectStreamResponseMcpCallCompleted, + OpenAIResponseObjectStreamResponseMcpCallFailed, + OpenAIResponseObjectStreamResponseMcpCallInProgress, + OpenAIResponseObjectStreamResponseWebSearchCallCompleted, + OpenAIResponseObjectStreamResponseWebSearchCallInProgress, + OpenAIResponseObjectStreamResponseWebSearchCallSearching, + OpenAIResponseOutputMessageFileSearchToolCall, + OpenAIResponseOutputMessageFileSearchToolCallResults, + OpenAIResponseOutputMessageWebSearchToolCall, + OpenAIToolMessageParam, + TextContentItem, + ToolGroups, + ToolInvocationResult, + ToolRuntime, + VectorIO, +) + +from .types import ChatCompletionContext, ToolExecutionResult + +logger = get_logger(name=__name__, category="agents::meta_reference") +tracer = trace.get_tracer(__name__) + + +class ToolExecutor: + def __init__( + self, + tool_groups_api: ToolGroups, + tool_runtime_api: ToolRuntime, + vector_io_api: VectorIO, + ): + self.tool_groups_api = tool_groups_api + self.tool_runtime_api = tool_runtime_api + self.vector_io_api = vector_io_api + + async def execute_tool_call( + self, + tool_call: OpenAIChatCompletionToolCall, + ctx: ChatCompletionContext, + sequence_number: int, + output_index: int, + item_id: str, + mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, + ) -> AsyncIterator[ToolExecutionResult]: + tool_call_id = tool_call.id + function = tool_call.function + tool_kwargs = json.loads(function.arguments) if function and function.arguments else {} + + if not function or not tool_call_id or not function.name: + yield ToolExecutionResult(sequence_number=sequence_number) + return + + # Emit progress events for tool execution start + async for event_result in self._emit_progress_events( + function.name, ctx, sequence_number, output_index, item_id, mcp_tool_to_server + ): + sequence_number = event_result.sequence_number + yield event_result + + # Execute the actual tool call + error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server) + + # Emit completion events for tool execution + has_error = bool( + error_exc + or ( + result + and ( + ((error_code := getattr(result, "error_code", None)) and error_code > 0) + or getattr(result, "error_message", None) + ) + ) + ) + async for event_result in self._emit_completion_events( + function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server + ): + sequence_number = event_result.sequence_number + yield event_result + + # Build result messages from tool execution + output_message, input_message = await self._build_result_messages( + function, tool_call_id, item_id, tool_kwargs, ctx, error_exc, result, has_error, mcp_tool_to_server + ) + + # Yield the final result + yield ToolExecutionResult( + sequence_number=sequence_number, + final_output_message=output_message, + final_input_message=input_message, + citation_files=( + metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None + ), + ) + + async def _execute_knowledge_search_via_vector_store( + self, + query: str, + response_file_search_tool: OpenAIResponseInputToolFileSearch, + ) -> ToolInvocationResult: + """Execute knowledge search using vector_stores.search API with filters support.""" + search_results = [] + + # Create search tasks for all vector stores + async def search_single_store(vector_store_id): + try: + search_response = await self.vector_io_api.openai_search_vector_store( + vector_store_id=vector_store_id, + query=query, + filters=response_file_search_tool.filters, + max_num_results=response_file_search_tool.max_num_results, + ranking_options=response_file_search_tool.ranking_options, + rewrite_query=False, + ) + return search_response.data + except Exception as e: + logger.warning(f"Failed to search vector store {vector_store_id}: {e}") + return [] + + # Run all searches in parallel using gather + search_tasks = [search_single_store(vid) for vid in response_file_search_tool.vector_store_ids] + all_results = await asyncio.gather(*search_tasks) + + # Flatten results + for results in all_results: + search_results.extend(results) + + content_items = [] + content_items.append( + TextContentItem( + text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n" + ) + ) + + unique_files = set() + for i, result_item in enumerate(search_results): + chunk_text = result_item.content[0].text if result_item.content else "" + # Get file_id from attributes if result_item.file_id is empty + file_id = result_item.file_id or ( + result_item.attributes.get("document_id") if result_item.attributes else None + ) + metadata_text = f"document_id: {file_id}, score: {result_item.score}" + if result_item.attributes: + metadata_text += f", attributes: {result_item.attributes}" + + text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n" + content_items.append(TextContentItem(text=text_content)) + unique_files.add(file_id) + + content_items.append(TextContentItem(text="END of knowledge_search tool results.\n")) + + citation_instruction = "" + if unique_files: + citation_instruction = ( + " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). " + "Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)." + ) + + content_items.append( + TextContentItem( + text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n', + ) + ) + + # handling missing attributes for old versions + citation_files = {} + for result in search_results: + file_id = result.file_id + if not file_id and result.attributes: + file_id = result.attributes.get("document_id") + + filename = result.filename + if not filename and result.attributes: + filename = result.attributes.get("filename") + if not filename: + filename = "unknown" + + citation_files[file_id] = filename + + # Cast to proper InterleavedContent type (list invariance) + return ToolInvocationResult( + content=content_items, # type: ignore[arg-type] + metadata={ + "document_ids": [r.file_id for r in search_results], + "chunks": [r.content[0].text if r.content else "" for r in search_results], + "scores": [r.score for r in search_results], + "citation_files": citation_files, + }, + ) + + async def _emit_progress_events( + self, + function_name: str, + ctx: ChatCompletionContext, + sequence_number: int, + output_index: int, + item_id: str, + mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, + ) -> AsyncIterator[ToolExecutionResult]: + """Emit progress events for tool execution start.""" + # Emit in_progress event based on tool type (only for tools with specific streaming events) + if mcp_tool_to_server and function_name in mcp_tool_to_server: + sequence_number += 1 + yield ToolExecutionResult( + stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ), + sequence_number=sequence_number, + ) + elif function_name == "web_search": + sequence_number += 1 + yield ToolExecutionResult( + stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ), + sequence_number=sequence_number, + ) + elif function_name == "knowledge_search": + sequence_number += 1 + yield ToolExecutionResult( + stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ), + sequence_number=sequence_number, + ) + + # For web search, emit searching event + if function_name == "web_search": + sequence_number += 1 + yield ToolExecutionResult( + stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ), + sequence_number=sequence_number, + ) + + # For file search, emit searching event + if function_name == "knowledge_search": + sequence_number += 1 + yield ToolExecutionResult( + stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ), + sequence_number=sequence_number, + ) + + async def _execute_tool( + self, + function_name: str, + tool_kwargs: dict, + ctx: ChatCompletionContext, + mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, + ) -> tuple[Exception | None, Any]: + """Execute the tool and return error exception and result.""" + error_exc = None + result = None + + try: + if mcp_tool_to_server and function_name in mcp_tool_to_server: + from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool + + mcp_tool = mcp_tool_to_server[function_name] + attributes = { + "server_label": mcp_tool.server_label, + "server_url": mcp_tool.server_url, + "tool_name": function_name, + } + # TODO: follow semantic conventions for Open Telemetry tool spans + # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span + with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes): + result = await invoke_mcp_tool( + endpoint=mcp_tool.server_url, + tool_name=function_name, + kwargs=tool_kwargs, + headers=mcp_tool.headers, + authorization=mcp_tool.authorization, + ) + elif function_name == "knowledge_search": + response_file_search_tool = ( + next( + (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)), + None, + ) + if ctx.response_tools + else None + ) + if response_file_search_tool: + # Use vector_stores.search API instead of knowledge_search tool + # to support filters and ranking_options + query = tool_kwargs.get("query", "") + with tracer.start_as_current_span("knowledge_search"): + result = await self._execute_knowledge_search_via_vector_store( + query=query, + response_file_search_tool=response_file_search_tool, + ) + else: + attributes = { + "tool_name": function_name, + } + # TODO: follow semantic conventions for Open Telemetry tool spans + # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span + with tracer.start_as_current_span("invoke_tool", attributes=attributes): + result = await self.tool_runtime_api.invoke_tool( + tool_name=function_name, + kwargs=tool_kwargs, + ) + except Exception as e: + error_exc = e + + return error_exc, result + + async def _emit_completion_events( + self, + function_name: str, + ctx: ChatCompletionContext, + sequence_number: int, + output_index: int, + item_id: str, + has_error: bool, + mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, + ) -> AsyncIterator[ToolExecutionResult]: + """Emit completion or failure events for tool execution.""" + if mcp_tool_to_server and function_name in mcp_tool_to_server: + sequence_number += 1 + if has_error: + mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed( + sequence_number=sequence_number, + ) + yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number) + else: + mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted( + sequence_number=sequence_number, + ) + yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number) + elif function_name == "web_search": + sequence_number += 1 + web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ) + yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number) + elif function_name == "knowledge_search": + sequence_number += 1 + file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted( + item_id=item_id, + output_index=output_index, + sequence_number=sequence_number, + ) + yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number) + + async def _build_result_messages( + self, + function, + tool_call_id: str, + item_id: str, + tool_kwargs: dict, + ctx: ChatCompletionContext, + error_exc: Exception | None, + result: Any, + has_error: bool, + mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None, + ) -> tuple[Any, Any]: + """Build output and input messages from tool execution results.""" + from llama_stack.providers.utils.inference.prompt_adapter import ( + interleaved_content_as_str, + ) + + # Build output message + message: Any + if mcp_tool_to_server and function.name in mcp_tool_to_server: + from llama_stack_api import ( + OpenAIResponseOutputMessageMCPCall, + ) + + message = OpenAIResponseOutputMessageMCPCall( + id=item_id, + arguments=function.arguments, + name=function.name, + server_label=mcp_tool_to_server[function.name].server_label, + ) + if error_exc: + message.error = str(error_exc) + elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or ( + result and getattr(result, "error_message", None) + ): + ec = getattr(result, "error_code", "unknown") + em = getattr(result, "error_message", "") + message.error = f"Error (code {ec}): {em}" + elif result and (content := getattr(result, "content", None)): + message.output = interleaved_content_as_str(content) + else: + if function.name == "web_search": + message = OpenAIResponseOutputMessageWebSearchToolCall( + id=item_id, + status="completed", + ) + if has_error: + message.status = "failed" + elif function.name == "knowledge_search": + message = OpenAIResponseOutputMessageFileSearchToolCall( + id=item_id, + queries=[tool_kwargs.get("query", "")], + status="completed", + ) + if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata: + message.results = [] + for i, doc_id in enumerate(metadata["document_ids"]): + text = metadata["chunks"][i] if "chunks" in metadata else None + score = metadata["scores"][i] if "scores" in metadata else None + message.results.append( + OpenAIResponseOutputMessageFileSearchToolCallResults( + file_id=doc_id, + filename=doc_id, + text=text if text is not None else "", + score=score if score is not None else 0.0, + attributes={}, + ) + ) + if has_error: + message.status = "failed" + else: + raise ValueError(f"Unknown tool {function.name} called") + + # Build input message + input_message: OpenAIToolMessageParam | None = None + if result and (result_content := getattr(result, "content", None)): + # all the mypy contortions here are still unsatisfactory with random Any typing + if isinstance(result_content, str): + msg_content: str | list[Any] = result_content + elif isinstance(result_content, list): + content_list: list[Any] = [] + for item in result_content: + part: Any + if isinstance(item, TextContentItem): + part = OpenAIChatCompletionContentPartTextParam(text=item.text) + elif isinstance(item, ImageContentItem): + if item.image.data: + url_value = f"data:image;base64,{item.image.data}" + else: + url_value = str(item.image.url) if item.image.url else "" + part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value)) + else: + raise ValueError(f"Unknown result content type: {type(item)}") + content_list.append(part) + msg_content = content_list + else: + raise ValueError(f"Unknown result content type: {type(result_content)}") + # OpenAIToolMessageParam accepts str | list[TextParam] but we may have images + # This is runtime-safe as the API accepts it, but mypy complains + input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type] + else: + text = str(error_exc) if error_exc else "Tool execution failed" + input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id) + + return message, input_message diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py similarity index 86% rename from llama_stack/providers/inline/agents/meta_reference/responses/types.py rename to src/llama_stack/providers/inline/agents/meta_reference/responses/types.py index 829badf38..f6efcee22 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/types.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py @@ -5,11 +5,15 @@ # the root directory of this source tree. from dataclasses import dataclass +from typing import cast from openai.types.chat import ChatCompletionToolParam from pydantic import BaseModel -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( + OpenAIChatCompletionToolCall, + OpenAIMessageParam, + OpenAIResponseFormatParam, OpenAIResponseInput, OpenAIResponseInputTool, OpenAIResponseInputToolFileSearch, @@ -25,7 +29,6 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseTool, OpenAIResponseToolMCP, ) -from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam class ToolExecutionResult(BaseModel): @@ -100,17 +103,19 @@ class ToolContext(BaseModel): if isinstance(tool, OpenAIResponseToolMCP): previous_tools_by_label[tool.server_label] = tool # collect tool definitions which are the same in current and previous requests: - tools_to_process = [] + tools_to_process: list[OpenAIResponseInputTool] = [] matched: dict[str, OpenAIResponseInputToolMCP] = {} - for tool in self.current_tools: + # Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union) + # which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct. + for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment] if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label: previous_tool = previous_tools_by_label[tool.server_label] if previous_tool.allowed_tools == tool.allowed_tools: matched[tool.server_label] = tool else: - tools_to_process.append(tool) + tools_to_process.append(tool) # type: ignore[arg-type] else: - tools_to_process.append(tool) + tools_to_process.append(tool) # type: ignore[arg-type] # tools that are not the same or were not previously defined need to be processed: self.tools_to_process = tools_to_process # for all matched definitions, get the mcp_list_tools objects from the previous output: @@ -119,9 +124,11 @@ class ToolContext(BaseModel): ] # reconstruct the tool to server mappings that can be reused: for listing in self.previous_tool_listings: + # listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool] definition = matched[listing.server_label] - for tool in listing.tools: - self.previous_tools[tool.name] = definition + for mcp_tool in listing.tools: + # mcp_tool is MCPListToolsTool which has a name: str field + self.previous_tools[mcp_tool.name] = definition def available_tools(self) -> list[OpenAIResponseTool]: if not self.current_tools: @@ -139,6 +146,8 @@ class ToolContext(BaseModel): server_label=tool.server_label, allowed_tools=tool.allowed_tools, ) + # Exhaustive check - all tool types should be handled above + raise AssertionError(f"Unexpected tool type: {type(tool)}") return [convert_tool(tool) for tool in self.current_tools] diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py new file mode 100644 index 000000000..7bbf6bd30 --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py @@ -0,0 +1,501 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import base64 +import mimetypes +import re +import uuid +from collections.abc import Sequence + +from llama_stack_api import ( + Files, + OpenAIAssistantMessageParam, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChoice, + OpenAIDeveloperMessageParam, + OpenAIFile, + OpenAIFileFile, + OpenAIImageURL, + OpenAIJSONSchema, + OpenAIMessageParam, + OpenAIResponseAnnotationFileCitation, + OpenAIResponseFormatJSONObject, + OpenAIResponseFormatJSONSchema, + OpenAIResponseFormatParam, + OpenAIResponseFormatText, + OpenAIResponseInput, + OpenAIResponseInputFunctionToolCallOutput, + OpenAIResponseInputMessageContent, + OpenAIResponseInputMessageContentFile, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseMCPApprovalRequest, + OpenAIResponseMCPApprovalResponse, + OpenAIResponseMessage, + OpenAIResponseOutputMessageContent, + OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageFileSearchToolCall, + OpenAIResponseOutputMessageFunctionToolCall, + OpenAIResponseOutputMessageMCPCall, + OpenAIResponseOutputMessageMCPListTools, + OpenAIResponseOutputMessageWebSearchToolCall, + OpenAIResponseText, + OpenAISystemMessageParam, + OpenAIToolMessageParam, + OpenAIUserMessageParam, + ResponseGuardrailSpec, + Safety, +) + + +async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes: + """ + Extract raw bytes from file using the Files API. + + :param file_id: The file identifier (e.g., "file-abc123") + :param files_api: Files API instance + :returns: Raw file content as bytes + :raises: ValueError if file cannot be retrieved + """ + try: + response = await files_api.openai_retrieve_file_content(file_id) + return bytes(response.body) + except Exception as e: + raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e + + +def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str: + """ + Converts raw binary bytes into a safe ASCII text representation for URLs + + :param raw_bytes: the actual bytes that represents file content + :returns: string of utf-8 characters + """ + return base64.b64encode(raw_bytes).decode("utf-8") + + +def construct_data_url(ascii_text: str, mime_type: str | None) -> str: + """ + Construct data url with decoded data inside + + :param ascii_text: ASCII content + :param mime_type: MIME type of file + :returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E) + """ + if not mime_type: + mime_type = "application/octet-stream" + + return f"data:{mime_type};base64,{ascii_text}" + + +async def convert_chat_choice_to_response_message( + choice: OpenAIChoice, + citation_files: dict[str, str] | None = None, + *, + message_id: str | None = None, +) -> OpenAIResponseMessage: + """Convert an OpenAI Chat Completion choice into an OpenAI Response output message.""" + output_content = "" + if isinstance(choice.message.content, str): + output_content = choice.message.content + elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): + output_content = choice.message.content.text + else: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}" + ) + + annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {}) + + return OpenAIResponseMessage( + id=message_id or f"msg_{uuid.uuid4()}", + content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=list(annotations))], + status="completed", + role="assistant", + ) + + +async def convert_response_content_to_chat_content( + content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent], + files_api: Files | None, +) -> str | list[OpenAIChatCompletionContentPartParam]: + """ + Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. + + The content schemas of each API look similar, but are not exactly the same. + + :param content: The content to convert + :param files_api: Files API for resolving file_id to raw file content (required if content contains files/images) + """ + if isinstance(content, str): + return content + + # Type with union to avoid list invariance issues + converted_parts: list[OpenAIChatCompletionContentPartParam] = [] + for content_part in content: + if isinstance(content_part, OpenAIResponseInputMessageContentText): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) + elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) + elif isinstance(content_part, OpenAIResponseInputMessageContentImage): + detail = content_part.detail + image_mime_type = None + if content_part.image_url: + image_url = OpenAIImageURL(url=content_part.image_url, detail=detail) + converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) + elif content_part.file_id: + if files_api is None: + raise ValueError("file_ids are not supported by this implementation of the Stack") + image_file_response = await files_api.openai_retrieve_file(content_part.file_id) + if image_file_response.filename: + image_mime_type, _ = mimetypes.guess_type(image_file_response.filename) + raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api) + ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes) + image_data_url = construct_data_url(ascii_text, image_mime_type) + image_url = OpenAIImageURL(url=image_data_url, detail=detail) + converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) + else: + raise ValueError( + f"Image content must have either 'image_url' or 'file_id'. " + f"Got image_url={content_part.image_url}, file_id={content_part.file_id}" + ) + elif isinstance(content_part, OpenAIResponseInputMessageContentFile): + resolved_file_data = None + file_data = content_part.file_data + file_id = content_part.file_id + file_url = content_part.file_url + filename = content_part.filename + file_mime_type = None + if not any([file_data, file_id, file_url]): + raise ValueError( + f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. " + f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}" + ) + if file_id: + if files_api is None: + raise ValueError("file_ids are not supported by this implementation of the Stack") + + file_response = await files_api.openai_retrieve_file(file_id) + if not filename: + filename = file_response.filename + file_mime_type, _ = mimetypes.guess_type(file_response.filename) + raw_file_bytes = await extract_bytes_from_file(file_id, files_api) + ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes) + resolved_file_data = construct_data_url(ascii_text, file_mime_type) + elif file_data: + if file_data.startswith("data:"): + resolved_file_data = file_data + else: + # Raw base64 data, wrap in data URL format + if filename: + file_mime_type, _ = mimetypes.guess_type(filename) + resolved_file_data = construct_data_url(file_data, file_mime_type) + elif file_url: + resolved_file_data = file_url + converted_parts.append( + OpenAIFile( + file=OpenAIFileFile( + file_data=resolved_file_data, + filename=filename, + ) + ) + ) + elif isinstance(content_part, str): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part)) + else: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context" + ) + return converted_parts + + +async def convert_response_input_to_chat_messages( + input: str | list[OpenAIResponseInput], + previous_messages: list[OpenAIMessageParam] | None = None, + files_api: Files | None = None, +) -> list[OpenAIMessageParam]: + """ + Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages. + + :param input: The input to convert + :param previous_messages: Optional previous messages to check for function_call references + :param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content) + """ + messages: list[OpenAIMessageParam] = [] + if isinstance(input, list): + # extract all OpenAIResponseInputFunctionToolCallOutput items + # so their corresponding OpenAIToolMessageParam instances can + # be added immediately following the corresponding + # OpenAIAssistantMessageParam + tool_call_results = {} + for input_item in input: + if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput): + tool_call_results[input_item.call_id] = OpenAIToolMessageParam( + content=input_item.output, + tool_call_id=input_item.call_id, + ) + + for input_item in input: + if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput): + # skip as these have been extracted and inserted in order + pass + elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall): + tool_call = OpenAIChatCompletionToolCall( + index=0, + id=input_item.call_id, + function=OpenAIChatCompletionToolCallFunction( + name=input_item.name, + arguments=input_item.arguments, + ), + ) + messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) + if input_item.call_id in tool_call_results: + messages.append(tool_call_results[input_item.call_id]) + del tool_call_results[input_item.call_id] + elif isinstance(input_item, OpenAIResponseOutputMessageMCPCall): + tool_call = OpenAIChatCompletionToolCall( + index=0, + id=input_item.id, + function=OpenAIChatCompletionToolCallFunction( + name=input_item.name, + arguments=input_item.arguments, + ), + ) + messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) + # Output can be None, use empty string as fallback + output_content = input_item.output if input_item.output is not None else "" + messages.append( + OpenAIToolMessageParam( + content=output_content, + tool_call_id=input_item.id, + ) + ) + elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools): + # the tool list will be handled separately + pass + elif isinstance( + input_item, + OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall, + ): + # these tool calls are tracked internally but not converted to chat messages + pass + elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance( + input_item, OpenAIResponseMCPApprovalResponse + ): + # these are handled by the responses impl itself and not pass through to chat completions + pass + elif isinstance(input_item, OpenAIResponseMessage): + # Narrow type to OpenAIResponseMessage which has content and role attributes + content = await convert_response_content_to_chat_content(input_item.content, files_api) + message_type = await get_message_type_by_role(input_item.role) + if message_type is None: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context" + ) + # Skip user messages that duplicate the last user message in previous_messages + # This handles cases where input includes context for function_call_outputs + if previous_messages and input_item.role == "user": + last_user_msg = None + for msg in reversed(previous_messages): + if isinstance(msg, OpenAIUserMessageParam): + last_user_msg = msg + break + if last_user_msg: + last_user_content = getattr(last_user_msg, "content", None) + if last_user_content == content: + continue # Skip duplicate user message + # Dynamic message type call - different message types have different content expectations + messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type] + if len(tool_call_results): + # Check if unpaired function_call_outputs reference function_calls from previous messages + if previous_messages: + previous_call_ids = _extract_tool_call_ids(previous_messages) + for call_id in list(tool_call_results.keys()): + if call_id in previous_call_ids: + # Valid: this output references a call from previous messages + # Add the tool message + messages.append(tool_call_results[call_id]) + del tool_call_results[call_id] + + # If still have unpaired outputs, error + if len(tool_call_results): + raise ValueError( + f"Received function_call_output(s) with call_id(s) {tool_call_results.keys()}, but no corresponding function_call" + ) + else: + messages.append(OpenAIUserMessageParam(content=input)) + return messages + + +def _extract_tool_call_ids(messages: list[OpenAIMessageParam]) -> set[str]: + """Extract all tool_call IDs from messages.""" + call_ids = set() + for msg in messages: + if isinstance(msg, OpenAIAssistantMessageParam): + tool_calls = getattr(msg, "tool_calls", None) + if tool_calls: + for tool_call in tool_calls: + # tool_call is a Pydantic model, use attribute access + call_ids.add(tool_call.id) + return call_ids + + +async def convert_response_text_to_chat_response_format( + text: OpenAIResponseText, +) -> OpenAIResponseFormatParam: + """ + Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format. + """ + if not text.format or text.format["type"] == "text": + return OpenAIResponseFormatText(type="text") + if text.format["type"] == "json_object": + return OpenAIResponseFormatJSONObject() + if text.format["type"] == "json_schema": + # Assert name exists for json_schema format + assert text.format.get("name"), "json_schema format requires a name" + schema_name: str = text.format["name"] # type: ignore[assignment] + return OpenAIResponseFormatJSONSchema( + json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"]) + ) + raise ValueError(f"Unsupported text format: {text.format}") + + +async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None: + """Get the appropriate OpenAI message parameter type for a given role.""" + role_to_type = { + "user": OpenAIUserMessageParam, + "system": OpenAISystemMessageParam, + "assistant": OpenAIAssistantMessageParam, + "developer": OpenAIDeveloperMessageParam, + } + return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass + + +def _extract_citations_from_text( + text: str, citation_files: dict[str, str] +) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]: + """Extract citation markers from text and create annotations + + Args: + text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A] + citation_files: Dictionary mapping file_id to filename + + Returns: + Tuple of (annotations_list, clean_text_without_markers) + """ + file_id_regex = re.compile(r"<\|(?Pfile-[A-Za-z0-9_-]+)\|>") + + annotations = [] + parts = [] + total_len = 0 + last_end = 0 + + for m in file_id_regex.finditer(text): + # segment before the marker + prefix = text[last_end : m.start()] + + # drop one space if it exists (since marker is at sentence end) + if prefix.endswith(" "): + prefix = prefix[:-1] + + parts.append(prefix) + total_len += len(prefix) + + fid = m.group(1) + if fid in citation_files: + annotations.append( + OpenAIResponseAnnotationFileCitation( + file_id=fid, + filename=citation_files[fid], + index=total_len, # index points to punctuation + ) + ) + + last_end = m.end() + + parts.append(text[last_end:]) + cleaned_text = "".join(parts) + return annotations, cleaned_text + + +def is_function_tool_call( + tool_call: OpenAIChatCompletionToolCall, + tools: list[OpenAIResponseInputTool], +) -> bool: + if not tool_call.function: + return False + for t in tools: + if t.type == "function" and t.name == tool_call.function.name: + return True + return False + + +async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None: + """Run guardrails against messages and return violation message if blocked.""" + if not messages: + return None + + # If safety API is not available, skip guardrails + if safety_api is None: + return None + + # Look up shields to get their provider_resource_id (actual model ID) + model_ids = [] + # TODO: list_shields not in Safety interface but available at runtime via API routing + shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined] + + for guardrail_id in guardrail_ids: + matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id] + if matching_shields: + model_id = matching_shields[0].provider_resource_id + model_ids.append(model_id) + else: + raise ValueError(f"No shield found with identifier '{guardrail_id}'") + + guardrail_tasks = [safety_api.run_moderation(messages, model=model_id) for model_id in model_ids] + responses = await asyncio.gather(*guardrail_tasks) + + for response in responses: + for result in response.results: + if result.flagged: + message = result.user_message or "Content blocked by safety guardrails" + flagged_categories = ( + [cat for cat, flagged in result.categories.items() if flagged] if result.categories else [] + ) + violation_type = result.metadata.get("violation_type", []) if result.metadata else [] + + if flagged_categories: + message += f" (flagged for: {', '.join(flagged_categories)})" + if violation_type: + message += f" (violation type: {', '.join(violation_type)})" + + return message + + # No violations found + return None + + +def extract_guardrail_ids(guardrails: list | None) -> list[str]: + """Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects.""" + if not guardrails: + return [] + + guardrail_ids = [] + for guardrail in guardrails: + if isinstance(guardrail, str): + guardrail_ids.append(guardrail) + elif isinstance(guardrail, ResponseGuardrailSpec): + guardrail_ids.append(guardrail.type) + else: + raise ValueError(f"Unknown guardrail format: {guardrail}, expected str or ResponseGuardrailSpec") + + return guardrail_ids diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py new file mode 100644 index 000000000..123a2e283 --- /dev/null +++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio + +from llama_stack.log import get_logger +from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel + +log = get_logger(name=__name__, category="agents::meta_reference") + + +class SafetyException(Exception): # noqa: N818 + def __init__(self, violation: SafetyViolation): + self.violation = violation + super().__init__(violation.user_message) + + +class ShieldRunnerMixin: + def __init__( + self, + safety_api: Safety, + input_shields: list[str] | None = None, + output_shields: list[str] | None = None, + ): + self.safety_api = safety_api + self.input_shields = input_shields + self.output_shields = output_shields + + async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None: + responses = await asyncio.gather( + *[ + self.safety_api.run_shield(shield_id=identifier, messages=messages, params={}) + for identifier in identifiers + ] + ) + for identifier, response in zip(identifiers, responses, strict=False): + if not response.violation: + continue + + violation = response.violation + if violation.violation_level == ViolationLevel.ERROR: + raise SafetyException(violation) + elif violation.violation_level == ViolationLevel.WARN: + log.warning(f"[Warn]{identifier} raised a warning") diff --git a/llama_stack/models/llama/llama4/__init__.py b/src/llama_stack/providers/inline/batches/__init__.py similarity index 100% rename from llama_stack/models/llama/llama4/__init__.py rename to src/llama_stack/providers/inline/batches/__init__.py diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py new file mode 100644 index 000000000..b48c82864 --- /dev/null +++ b/src/llama_stack/providers/inline/batches/reference/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import AccessRule, Api +from llama_stack.core.storage.kvstore import kvstore_impl +from llama_stack_api import Files, Inference, Models + +from .batches import ReferenceBatchesImpl +from .config import ReferenceBatchesImplConfig + +__all__ = ["ReferenceBatchesImpl", "ReferenceBatchesImplConfig"] + + +async def get_provider_impl(config: ReferenceBatchesImplConfig, deps: dict[Api, Any], policy: list[AccessRule]): + kvstore = await kvstore_impl(config.kvstore) + inference_api: Inference | None = deps.get(Api.inference) + files_api: Files | None = deps.get(Api.files) + models_api: Models | None = deps.get(Api.models) + + if inference_api is None: + raise ValueError("Inference API is required but not provided in dependencies") + if files_api is None: + raise ValueError("Files API is required but not provided in dependencies") + if models_api is None: + raise ValueError("Models API is required but not provided in dependencies") + + impl = ReferenceBatchesImpl(config, inference_api, files_api, models_api, kvstore) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py new file mode 100644 index 000000000..aaa2c7b22 --- /dev/null +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -0,0 +1,686 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import hashlib +import itertools +import json +import time +import uuid +from io import BytesIO +from typing import Any, Literal + +from openai.types.batch import BatchError, Errors +from pydantic import BaseModel + +from llama_stack.core.storage.kvstore import KVStore +from llama_stack.log import get_logger +from llama_stack_api import ( + Batches, + BatchObject, + ConflictError, + Files, + Inference, + ListBatchesResponse, + Models, + OpenAIAssistantMessageParam, + OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletionRequestWithExtraBody, + OpenAIDeveloperMessageParam, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIFilePurpose, + OpenAIMessageParam, + OpenAISystemMessageParam, + OpenAIToolMessageParam, + OpenAIUserMessageParam, + ResourceNotFoundError, +) + +from .config import ReferenceBatchesImplConfig + +BATCH_PREFIX = "batch:" + +logger = get_logger(__name__) + + +class AsyncBytesIO: + """ + Async-compatible BytesIO wrapper to allow async file-like operations. + + We use this when uploading files to the Files API, as it expects an + async file-like object. + """ + + def __init__(self, data: bytes): + self._buffer = BytesIO(data) + + async def read(self, n=-1): + return self._buffer.read(n) + + async def seek(self, pos, whence=0): + return self._buffer.seek(pos, whence) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._buffer.close() + + def __getattr__(self, name): + return getattr(self._buffer, name) + + +class BatchRequest(BaseModel): + line_num: int + custom_id: str + method: str + url: str + body: dict[str, Any] + + +def convert_to_openai_message_param(msg: dict[str, Any]) -> OpenAIMessageParam: + """Convert a message dictionary to OpenAIMessageParam based on role.""" + role = msg.get("role") + + if role == "user": + return OpenAIUserMessageParam(**msg) + elif role == "system": + return OpenAISystemMessageParam(**msg) + elif role == "assistant": + return OpenAIAssistantMessageParam(**msg) + elif role == "tool": + return OpenAIToolMessageParam(**msg) + elif role == "developer": + return OpenAIDeveloperMessageParam(**msg) + else: + raise ValueError(f"Unknown message role: {role}") + + +class ReferenceBatchesImpl(Batches): + """Reference implementation of the Batches API. + + This implementation processes batch files by making individual requests + to the inference API and generates output files with results. + """ + + def __init__( + self, + config: ReferenceBatchesImplConfig, + inference_api: Inference, + files_api: Files, + models_api: Models, + kvstore: KVStore, + ) -> None: + self.config = config + self.kvstore = kvstore + self.inference_api = inference_api + self.files_api = files_api + self.models_api = models_api + self._processing_tasks: dict[str, asyncio.Task] = {} + self._batch_semaphore = asyncio.Semaphore(config.max_concurrent_batches) + self._update_batch_lock = asyncio.Lock() + + # this is to allow tests to disable background processing + self.process_batches = True + + async def initialize(self) -> None: + # TODO: start background processing of existing tasks + pass + + async def shutdown(self) -> None: + """Shutdown the batches provider.""" + if self._processing_tasks: + # don't cancel tasks - just let them stop naturally on shutdown + # cancelling would mark batches as "cancelled" in the database + logger.info(f"Shutdown initiated with {len(self._processing_tasks)} active batch processing tasks") + + # TODO (SECURITY): this currently works w/ configured api keys, not with x-llamastack-provider-data or with user policy restrictions + async def create_batch( + self, + input_file_id: str, + endpoint: str, + completion_window: Literal["24h"], + metadata: dict[str, str] | None = None, + idempotency_key: str | None = None, + ) -> BatchObject: + """ + Create a new batch for processing multiple API requests. + + This implementation provides optional idempotency: when an idempotency key + (idempotency_key) is provided, a deterministic ID is generated based on the input + parameters. If a batch with the same parameters already exists, it will be + returned instead of creating a duplicate. Without an idempotency key, + each request creates a new batch with a unique ID. + + Args: + input_file_id: The ID of an uploaded file containing requests for the batch. + endpoint: The endpoint to be used for all requests in the batch. + completion_window: The time window within which the batch should be processed. + metadata: Optional metadata for the batch. + idempotency_key: Optional idempotency key for enabling idempotent behavior. + + Returns: + The created or existing batch object. + """ + + # Error handling by levels - + # 0. Input param handling, results in 40x errors before processing, e.g. + # - Wrong completion_window + # - Invalid metadata types + # - Unknown endpoint + # -> no batch created + # 1. Errors preventing processing, result in BatchErrors aggregated in process_batch, e.g. + # - input_file_id missing + # - invalid json in file + # - missing custom_id, method, url, body + # - invalid model + # - streaming + # -> batch created, validation sends to failed status + # 2. Processing errors, result in error_file_id entries, e.g. + # - Any error returned from inference endpoint + # -> batch created, goes to completed status + + # TODO: set expiration time for garbage collection + + if endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]: + raise ValueError( + f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint", + ) + + if completion_window != "24h": + raise ValueError( + f"Invalid completion_window: {completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window", + ) + + batch_id = f"batch_{uuid.uuid4().hex[:16]}" + + # For idempotent requests, use the idempotency key for the batch ID + # This ensures the same key always maps to the same batch ID, + # allowing us to detect parameter conflicts + if idempotency_key is not None: + hash_input = idempotency_key.encode("utf-8") + hash_digest = hashlib.sha256(hash_input).hexdigest()[:24] + batch_id = f"batch_{hash_digest}" + + try: + existing_batch = await self.retrieve_batch(batch_id) + + if ( + existing_batch.input_file_id != input_file_id + or existing_batch.endpoint != endpoint + or existing_batch.completion_window != completion_window + or existing_batch.metadata != metadata + ): + raise ConflictError( + f"Idempotency key '{idempotency_key}' was previously used with different parameters. " + "Either use a new idempotency key or ensure all parameters match the original request." + ) + + logger.info(f"Returning existing batch with ID: {batch_id}") + return existing_batch + except ResourceNotFoundError: + # Batch doesn't exist, continue with creation + pass + + current_time = int(time.time()) + + batch = BatchObject( + id=batch_id, + object="batch", + endpoint=endpoint, + input_file_id=input_file_id, + completion_window=completion_window, + status="validating", + created_at=current_time, + metadata=metadata, + ) + + await self.kvstore.set(f"batch:{batch_id}", batch.to_json()) + logger.info(f"Created new batch with ID: {batch_id}") + + if self.process_batches: + task = asyncio.create_task(self._process_batch(batch_id)) + self._processing_tasks[batch_id] = task + + return batch + + async def cancel_batch(self, batch_id: str) -> BatchObject: + """Cancel a batch that is in progress.""" + batch = await self.retrieve_batch(batch_id) + + if batch.status in ["cancelled", "cancelling"]: + return batch + + if batch.status in ["completed", "failed", "expired"]: + raise ConflictError(f"Cannot cancel batch '{batch_id}' with status '{batch.status}'") + + await self._update_batch(batch_id, status="cancelling", cancelling_at=int(time.time())) + + if batch_id in self._processing_tasks: + self._processing_tasks[batch_id].cancel() + # note: task removal and status="cancelled" handled in finally block of _process_batch + + return await self.retrieve_batch(batch_id) + + async def list_batches( + self, + after: str | None = None, + limit: int = 20, + ) -> ListBatchesResponse: + """ + List all batches, eventually only for the current user. + + With no notion of user, we return all batches. + """ + batch_values = await self.kvstore.values_in_range("batch:", "batch:\xff") + + batches = [] + for batch_data in batch_values: + if batch_data: + batches.append(BatchObject.model_validate_json(batch_data)) + + batches.sort(key=lambda b: b.created_at, reverse=True) + + start_idx = 0 + if after: + for i, batch in enumerate(batches): + if batch.id == after: + start_idx = i + 1 + break + + page_batches = batches[start_idx : start_idx + limit] + has_more = (start_idx + limit) < len(batches) + + first_id = page_batches[0].id if page_batches else None + last_id = page_batches[-1].id if page_batches else None + + return ListBatchesResponse( + data=page_batches, + first_id=first_id, + last_id=last_id, + has_more=has_more, + ) + + async def retrieve_batch(self, batch_id: str) -> BatchObject: + """Retrieve information about a specific batch.""" + batch_data = await self.kvstore.get(f"batch:{batch_id}") + if not batch_data: + raise ResourceNotFoundError(batch_id, "Batch", "batches.list()") + + return BatchObject.model_validate_json(batch_data) + + async def _update_batch(self, batch_id: str, **updates) -> None: + """Update batch fields in kvstore.""" + async with self._update_batch_lock: + try: + batch = await self.retrieve_batch(batch_id) + + # batch processing is async. once cancelling, only allow "cancelled" status updates + if batch.status == "cancelling" and updates.get("status") != "cancelled": + logger.info( + f"Skipping status update for cancelled batch {batch_id}: attempted {updates.get('status')}" + ) + return + + if "errors" in updates: + updates["errors"] = updates["errors"].model_dump() + + batch_dict = batch.model_dump() + batch_dict.update(updates) + + await self.kvstore.set(f"batch:{batch_id}", json.dumps(batch_dict)) + except Exception as e: + logger.error(f"Failed to update batch {batch_id}: {e}") + + async def _validate_input(self, batch: BatchObject) -> tuple[list[BatchError], list[BatchRequest]]: + """ + Read & validate input, return errors and valid input. + + Validation of + - input_file_id existance + - valid json + - custom_id, method, url, body presence and valid + - no streaming + """ + requests: list[BatchRequest] = [] + errors: list[BatchError] = [] + try: + await self.files_api.openai_retrieve_file(batch.input_file_id) + except Exception: + errors.append( + BatchError( + code="invalid_request", + line=None, + message=f"Cannot find file {batch.input_file_id}.", + param="input_file_id", + ) + ) + return errors, requests + + # TODO(SECURITY): do something about large files + file_content_response = await self.files_api.openai_retrieve_file_content(batch.input_file_id) + # Handle both bytes and memoryview types - convert to bytes unconditionally + # (bytes(x) returns x if already bytes, creates new bytes from memoryview otherwise) + body_bytes = bytes(file_content_response.body) + file_content = body_bytes.decode("utf-8") + for line_num, line in enumerate(file_content.strip().split("\n"), 1): + if line.strip(): # skip empty lines + try: + request = json.loads(line) + + if not isinstance(request, dict): + errors.append( + BatchError( + code="invalid_request", + line=line_num, + message="Each line must be a JSON dictionary object", + ) + ) + continue + + valid = True + + for param, expected_type, type_string in [ + ("custom_id", str, "string"), + ("method", str, "string"), + ("url", str, "string"), + ("body", dict, "JSON dictionary object"), + ]: + if param not in request: + errors.append( + BatchError( + code="missing_required_parameter", + line=line_num, + message=f"Missing required parameter: {param}", + param=param, + ) + ) + valid = False + elif not isinstance(request[param], expected_type): + param_name = "URL" if param == "url" else param.capitalize() + errors.append( + BatchError( + code="invalid_request", + line=line_num, + message=f"{param_name} must be a {type_string}", + param=param, + ) + ) + valid = False + + if (url := request.get("url")) and isinstance(url, str) and url != batch.endpoint: + errors.append( + BatchError( + code="invalid_url", + line=line_num, + message="URL provided for this request does not match the batch endpoint", + param="url", + ) + ) + valid = False + + if (request_body := request.get("body")) and isinstance(request_body, dict): + if request_body.get("stream", False): + errors.append( + BatchError( + code="streaming_unsupported", + line=line_num, + message="Streaming is not supported in batch processing", + param="body.stream", + ) + ) + valid = False + + if batch.endpoint == "/v1/chat/completions": + required_params: list[tuple[str, Any, str]] = [ + ("model", str, "a string"), + # messages is specific to /v1/chat/completions + # we could skip validating messages here and let inference fail. however, + # that would be a very expensive way to find out messages is wrong. + ("messages", list, "an array"), # TODO: allow messages to be a string? + ] + elif batch.endpoint == "/v1/completions": + required_params = [ + ("model", str, "a string"), + ("prompt", str, "a string"), # TODO: allow prompt to be a list of strings?? + ] + else: # /v1/embeddings + required_params = [ + ("model", str, "a string"), + ("input", (str, list), "a string or array of strings"), + ] + + for param, expected_type, type_string in required_params: + if param not in request_body: + errors.append( + BatchError( + code="invalid_request", + line=line_num, + message=f"{param.capitalize()} parameter is required", + param=f"body.{param}", + ) + ) + valid = False + elif not isinstance(request_body[param], expected_type): + errors.append( + BatchError( + code="invalid_request", + line=line_num, + message=f"{param.capitalize()} must be {type_string}", + param=f"body.{param}", + ) + ) + valid = False + + if "model" in request_body and isinstance(request_body["model"], str): + try: + await self.models_api.get_model(request_body["model"]) + except Exception: + errors.append( + BatchError( + code="model_not_found", + line=line_num, + message=f"Model '{request_body['model']}' does not exist or is not supported", + param="body.model", + ) + ) + valid = False + + if valid: + assert isinstance(url, str), "URL must be a string" # for mypy + assert isinstance(request_body, dict), "Body must be a dictionary" # for mypy + requests.append( + BatchRequest( + line_num=line_num, + url=url, + method=request["method"], + custom_id=request["custom_id"], + body=request_body, + ), + ) + except json.JSONDecodeError: + errors.append( + BatchError( + code="invalid_json_line", + line=line_num, + message="This line is not parseable as valid JSON.", + ) + ) + + return errors, requests + + async def _process_batch(self, batch_id: str) -> None: + """Background task to process a batch of requests.""" + try: + logger.info(f"Starting batch processing for {batch_id}") + async with self._batch_semaphore: # semaphore to limit concurrency + logger.info(f"Acquired semaphore for batch {batch_id}") + await self._process_batch_impl(batch_id) + except asyncio.CancelledError: + logger.info(f"Batch processing cancelled for {batch_id}") + await self._update_batch(batch_id, status="cancelled", cancelled_at=int(time.time())) + except Exception as e: + logger.error(f"Batch processing failed for {batch_id}: {e}") + await self._update_batch( + batch_id, + status="failed", + failed_at=int(time.time()), + errors=Errors(data=[BatchError(code="internal_error", message=str(e))]), + ) + finally: + self._processing_tasks.pop(batch_id, None) + + async def _process_batch_impl(self, batch_id: str) -> None: + """Implementation of batch processing logic.""" + errors: list[BatchError] = [] + batch = await self.retrieve_batch(batch_id) + + errors, requests = await self._validate_input(batch) + if errors: + await self._update_batch(batch_id, status="failed", failed_at=int(time.time()), errors=Errors(data=errors)) + logger.info(f"Batch validation failed for {batch_id} with {len(errors)} errors") + return + + logger.info(f"Processing {len(requests)} requests for batch {batch_id}") + + total_requests = len(requests) + await self._update_batch( + batch_id, + status="in_progress", + request_counts={"total": total_requests, "completed": 0, "failed": 0}, + ) + + error_results = [] + success_results = [] + completed_count = 0 + failed_count = 0 + + for chunk in itertools.batched(requests, self.config.max_concurrent_requests_per_batch): + # we use a TaskGroup to ensure all process-single-request tasks are canceled when process-batch is cancelled + async with asyncio.TaskGroup() as tg: + chunk_tasks = [tg.create_task(self._process_single_request(batch_id, request)) for request in chunk] + + chunk_results = await asyncio.gather(*chunk_tasks, return_exceptions=True) + + for result in chunk_results: + if isinstance(result, dict) and result.get("error") is not None: # error response from inference + failed_count += 1 + error_results.append(result) + elif isinstance(result, dict) and result.get("response") is not None: # successful inference + completed_count += 1 + success_results.append(result) + else: # unexpected result + failed_count += 1 + errors.append(BatchError(code="internal_error", message=f"Unexpected result: {result}")) + + await self._update_batch( + batch_id, + request_counts={"total": total_requests, "completed": completed_count, "failed": failed_count}, + ) + + if errors: + await self._update_batch( + batch_id, status="failed", failed_at=int(time.time()), errors=Errors(data=errors) + ) + return + + try: + output_file_id = await self._create_output_file(batch_id, success_results, "success") + await self._update_batch(batch_id, output_file_id=output_file_id) + + error_file_id = await self._create_output_file(batch_id, error_results, "error") + await self._update_batch(batch_id, error_file_id=error_file_id) + + await self._update_batch(batch_id, status="completed", completed_at=int(time.time())) + + logger.info( + f"Batch processing completed for {batch_id}: {completed_count} completed, {failed_count} failed" + ) + except Exception as e: + # note: errors is empty at this point, so we don't lose anything by ignoring it + await self._update_batch( + batch_id, + status="failed", + failed_at=int(time.time()), + errors=Errors(data=[BatchError(code="output_failed", message=str(e))]), + ) + + async def _process_single_request(self, batch_id: str, request: BatchRequest) -> dict: + """Process a single request from the batch.""" + request_id = f"batch_req_{batch_id}_{request.line_num}" + + try: + # TODO(SECURITY): review body for security issues + if request.url == "/v1/chat/completions": + request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]] + chat_params = OpenAIChatCompletionRequestWithExtraBody(**request.body) + chat_response = await self.inference_api.openai_chat_completion(chat_params) + + # this is for mypy, we don't allow streaming so we'll get the right type + assert hasattr(chat_response, "model_dump_json"), "Chat response must have model_dump_json method" + return { + "id": request_id, + "custom_id": request.custom_id, + "response": { + "status_code": 200, + "request_id": request_id, # TODO: should this be different? + "body": chat_response.model_dump_json(), + }, + } + elif request.url == "/v1/completions": + completion_params = OpenAICompletionRequestWithExtraBody(**request.body) + completion_response = await self.inference_api.openai_completion(completion_params) + + # this is for mypy, we don't allow streaming so we'll get the right type + assert hasattr(completion_response, "model_dump_json"), ( + "Completion response must have model_dump_json method" + ) + return { + "id": request_id, + "custom_id": request.custom_id, + "response": { + "status_code": 200, + "request_id": request_id, + "body": completion_response.model_dump_json(), + }, + } + else: # /v1/embeddings + embeddings_response = await self.inference_api.openai_embeddings( + OpenAIEmbeddingsRequestWithExtraBody(**request.body) + ) + assert hasattr(embeddings_response, "model_dump_json"), ( + "Embeddings response must have model_dump_json method" + ) + return { + "id": request_id, + "custom_id": request.custom_id, + "response": { + "status_code": 200, + "request_id": request_id, # TODO: should this be different? + "body": embeddings_response.model_dump_json(), + }, + } + except Exception as e: + logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}") + return { + "id": request_id, + "custom_id": request.custom_id, + "error": {"type": "request_failed", "message": str(e)}, + } + + async def _create_output_file(self, batch_id: str, results: list[dict], file_type: str) -> str: + """ + Create an output file with batch results. + + This function filters results based on the specified file_type + and uploads the file to the Files API. + """ + output_lines = [json.dumps(result) for result in results] + + with AsyncBytesIO("\n".join(output_lines).encode("utf-8")) as file_buffer: + file_buffer.filename = f"{batch_id}_{file_type}.jsonl" + uploaded_file = await self.files_api.openai_upload_file(file=file_buffer, purpose=OpenAIFilePurpose.BATCH) + return uploaded_file.id diff --git a/llama_stack/providers/inline/batches/reference/config.py b/src/llama_stack/providers/inline/batches/reference/config.py similarity index 100% rename from llama_stack/providers/inline/batches/reference/config.py rename to src/llama_stack/providers/inline/batches/reference/config.py diff --git a/llama_stack/models/llama/llama4/prompt_templates/__init__.py b/src/llama_stack/providers/inline/datasetio/__init__.py similarity index 100% rename from llama_stack/models/llama/llama4/prompt_templates/__init__.py rename to src/llama_stack/providers/inline/datasetio/__init__.py diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/src/llama_stack/providers/inline/datasetio/localfs/__init__.py similarity index 100% rename from llama_stack/providers/inline/datasetio/localfs/__init__.py rename to src/llama_stack/providers/inline/datasetio/localfs/__init__.py diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/src/llama_stack/providers/inline/datasetio/localfs/config.py similarity index 100% rename from llama_stack/providers/inline/datasetio/localfs/config.py rename to src/llama_stack/providers/inline/datasetio/localfs/config.py diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py new file mode 100644 index 000000000..85c7cff3e --- /dev/null +++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any + +from llama_stack.core.storage.kvstore import kvstore_impl +from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri +from llama_stack.providers.utils.pagination import paginate_records +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse + +from .config import LocalFSDatasetIOConfig + +DATASETS_PREFIX = "localfs_datasets:" + + +class PandasDataframeDataset: + def __init__(self, dataset_def: Dataset, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.dataset_def = dataset_def + self.df = None + + def __len__(self) -> int: + assert self.df is not None, "Dataset not loaded. Please call .load() first" + return len(self.df) + + def __getitem__(self, idx): + assert self.df is not None, "Dataset not loaded. Please call .load() first" + if isinstance(idx, slice): + return self.df.iloc[idx].to_dict(orient="records") + else: + return self.df.iloc[idx].to_dict() + + async def load(self) -> None: + if self.df is not None: + return + + if self.dataset_def.source.type == "uri": + self.df = await get_dataframe_from_uri(self.dataset_def.source.uri) + elif self.dataset_def.source.type == "rows": + import pandas + + self.df = pandas.DataFrame(self.dataset_def.source.rows) + else: + raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}") + + if self.df is None: + raise ValueError(f"Failed to load dataset from {self.dataset_def.url}") + + +class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): + def __init__(self, config: LocalFSDatasetIOConfig) -> None: + self.config = config + # local registry for keeping track of datasets within the provider + self.dataset_infos = {} + self.kvstore = None + + async def initialize(self) -> None: + self.kvstore = await kvstore_impl(self.config.kvstore) + # Load existing datasets from kvstore + start_key = DATASETS_PREFIX + end_key = f"{DATASETS_PREFIX}\xff" + stored_datasets = await self.kvstore.values_in_range(start_key, end_key) + + for dataset in stored_datasets: + dataset = Dataset.model_validate_json(dataset) + self.dataset_infos[dataset.identifier] = dataset + + async def shutdown(self) -> None: ... + + async def register_dataset( + self, + dataset_def: Dataset, + ) -> None: + # Store in kvstore + key = f"{DATASETS_PREFIX}{dataset_def.identifier}" + await self.kvstore.set( + key=key, + value=dataset_def.model_dump_json(), + ) + self.dataset_infos[dataset_def.identifier] = dataset_def + + async def unregister_dataset(self, dataset_id: str) -> None: + key = f"{DATASETS_PREFIX}{dataset_id}" + await self.kvstore.delete(key=key) + del self.dataset_infos[dataset_id] + + async def iterrows( + self, + dataset_id: str, + start_index: int | None = None, + limit: int | None = None, + ) -> PaginatedResponse: + dataset_def = self.dataset_infos[dataset_id] + dataset_impl = PandasDataframeDataset(dataset_def) + await dataset_impl.load() + + records = dataset_impl.df.to_dict("records") + return paginate_records(records, start_index, limit) + + async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: + import pandas + + dataset_def = self.dataset_infos[dataset_id] + dataset_impl = PandasDataframeDataset(dataset_def) + await dataset_impl.load() + + new_rows_df = pandas.DataFrame(rows) + dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) diff --git a/llama_stack/models/llama/llama4/quantization/__init__.py b/src/llama_stack/providers/inline/eval/__init__.py similarity index 100% rename from llama_stack/models/llama/llama4/quantization/__init__.py rename to src/llama_stack/providers/inline/eval/__init__.py diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/src/llama_stack/providers/inline/eval/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/eval/meta_reference/__init__.py rename to src/llama_stack/providers/inline/eval/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/src/llama_stack/providers/inline/eval/meta_reference/config.py similarity index 100% rename from llama_stack/providers/inline/eval/meta_reference/config.py rename to src/llama_stack/providers/inline/eval/meta_reference/config.py diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py new file mode 100644 index 000000000..0f0cb84d6 --- /dev/null +++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -0,0 +1,214 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +import json +from typing import Any + +from tqdm import tqdm + +from llama_stack.core.storage.kvstore import kvstore_impl +from llama_stack.providers.utils.common.data_schema_validator import ColumnName +from llama_stack_api import ( + Agents, + Benchmark, + BenchmarkConfig, + BenchmarksProtocolPrivate, + DatasetIO, + Datasets, + Eval, + EvaluateResponse, + Inference, + Job, + JobStatus, + OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletionRequestWithExtraBody, + OpenAISystemMessageParam, + OpenAIUserMessageParam, + Scoring, +) + +from .config import MetaReferenceEvalConfig + +EVAL_TASKS_PREFIX = "benchmarks:" + + +class MetaReferenceEvalImpl( + Eval, + BenchmarksProtocolPrivate, +): + def __init__( + self, + config: MetaReferenceEvalConfig, + datasetio_api: DatasetIO, + datasets_api: Datasets, + scoring_api: Scoring, + inference_api: Inference, + agents_api: Agents, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets_api + self.scoring_api = scoring_api + self.inference_api = inference_api + self.agents_api = agents_api + + # TODO: assume sync job, will need jobs API for async scheduling + self.jobs = {} + + self.benchmarks = {} + + async def initialize(self) -> None: + self.kvstore = await kvstore_impl(self.config.kvstore) + # Load existing benchmarks from kvstore + start_key = EVAL_TASKS_PREFIX + end_key = f"{EVAL_TASKS_PREFIX}\xff" + stored_benchmarks = await self.kvstore.values_in_range(start_key, end_key) + + for benchmark in stored_benchmarks: + benchmark = Benchmark.model_validate_json(benchmark) + self.benchmarks[benchmark.identifier] = benchmark + + async def shutdown(self) -> None: ... + + async def register_benchmark(self, task_def: Benchmark) -> None: + # Store in kvstore + key = f"{EVAL_TASKS_PREFIX}{task_def.identifier}" + await self.kvstore.set( + key=key, + value=task_def.model_dump_json(), + ) + self.benchmarks[task_def.identifier] = task_def + + async def unregister_benchmark(self, benchmark_id: str) -> None: + if benchmark_id in self.benchmarks: + del self.benchmarks[benchmark_id] + + key = f"{EVAL_TASKS_PREFIX}{benchmark_id}" + await self.kvstore.delete(key) + + async def run_eval( + self, + benchmark_id: str, + benchmark_config: BenchmarkConfig, + ) -> Job: + task_def = self.benchmarks[benchmark_id] + dataset_id = task_def.dataset_id + scoring_functions = task_def.scoring_functions + + # TODO (xiyan): validate dataset schema + # dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) + + all_rows = await self.datasetio_api.iterrows( + dataset_id=dataset_id, + limit=(-1 if benchmark_config.num_examples is None else benchmark_config.num_examples), + ) + res = await self.evaluate_rows( + benchmark_id=benchmark_id, + input_rows=all_rows.data, + scoring_functions=scoring_functions, + benchmark_config=benchmark_config, + ) + + # TODO: currently needs to wait for generation before returning + # need job scheduler queue (ray/celery) w/ jobs api + job_id = str(len(self.jobs)) + self.jobs[job_id] = res + return Job(job_id=job_id, status=JobStatus.completed) + + async def _run_model_generation( + self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig + ) -> list[dict[str, Any]]: + candidate = benchmark_config.eval_candidate + assert candidate.sampling_params.max_tokens is not None, "SamplingParams.max_tokens must be provided" + sampling_params = {"max_tokens": candidate.sampling_params.max_tokens} + + generations = [] + for x in tqdm(input_rows): + if ColumnName.completion_input.value in x: + if candidate.sampling_params.stop: + sampling_params["stop"] = candidate.sampling_params.stop + + input_content = json.loads(x[ColumnName.completion_input.value]) + params = OpenAICompletionRequestWithExtraBody( + model=candidate.model, + prompt=input_content, + **sampling_params, + ) + response = await self.inference_api.openai_completion(params) + generations.append({ColumnName.generated_answer.value: response.choices[0].text}) + elif ColumnName.chat_completion_input.value in x: + chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value]) + input_messages = [ + OpenAIUserMessageParam(**x) for x in chat_completion_input_json if x["role"] == "user" + ] + + messages = [] + if candidate.system_message: + messages.append(candidate.system_message) + + messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"] + + messages += input_messages + params = OpenAIChatCompletionRequestWithExtraBody( + model=candidate.model, + messages=messages, + **sampling_params, + ) + response = await self.inference_api.openai_chat_completion(params) + generations.append({ColumnName.generated_answer.value: response.choices[0].message.content}) + else: + raise ValueError("Invalid input row") + + return generations + + async def evaluate_rows( + self, + benchmark_id: str, + input_rows: list[dict[str, Any]], + scoring_functions: list[str], + benchmark_config: BenchmarkConfig, + ) -> EvaluateResponse: + candidate = benchmark_config.eval_candidate + # Agent evaluation removed + if candidate.type == "model": + generations = await self._run_model_generation(input_rows, benchmark_config) + else: + raise ValueError(f"Invalid candidate type: {candidate.type}") + + # scoring with generated_answer + score_input_rows = [ + input_r | generated_r for input_r, generated_r in zip(input_rows, generations, strict=False) + ] + + if benchmark_config.scoring_params is not None: + scoring_functions_dict = { + scoring_fn_id: benchmark_config.scoring_params.get(scoring_fn_id, None) + for scoring_fn_id in scoring_functions + } + else: + scoring_functions_dict = dict.fromkeys(scoring_functions) + + score_response = await self.scoring_api.score( + input_rows=score_input_rows, scoring_functions=scoring_functions_dict + ) + + return EvaluateResponse(generations=generations, scores=score_response.results) + + async def job_status(self, benchmark_id: str, job_id: str) -> Job: + if job_id in self.jobs: + return Job(job_id=job_id, status=JobStatus.completed) + + raise ValueError(f"Job {job_id} not found") + + async def job_cancel(self, benchmark_id: str, job_id: str) -> None: + raise NotImplementedError("Job cancel is not implemented yet") + + async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: + job = await self.job_status(benchmark_id, job_id) + status = job.status + if not status or status != JobStatus.completed: + raise ValueError(f"Job is not completed, Status: {status.value}") + + return self.jobs[job_id] diff --git a/llama_stack/providers/inline/files/localfs/__init__.py b/src/llama_stack/providers/inline/files/localfs/__init__.py similarity index 100% rename from llama_stack/providers/inline/files/localfs/__init__.py rename to src/llama_stack/providers/inline/files/localfs/__init__.py diff --git a/llama_stack/providers/inline/files/localfs/config.py b/src/llama_stack/providers/inline/files/localfs/config.py similarity index 100% rename from llama_stack/providers/inline/files/localfs/config.py rename to src/llama_stack/providers/inline/files/localfs/config.py diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py new file mode 100644 index 000000000..2afe2fe5e --- /dev/null +++ b/src/llama_stack/providers/inline/files/localfs/files.py @@ -0,0 +1,219 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import time +import uuid +from pathlib import Path +from typing import Annotated + +from fastapi import Depends, File, Form, Response, UploadFile + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.log import get_logger +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, + Order, + ResourceNotFoundError, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType + +from .config import LocalfsFilesImplConfig + +logger = get_logger(name=__name__, category="files") + + +class LocalfsFilesImpl(Files): + def __init__(self, config: LocalfsFilesImplConfig, policy: list[AccessRule]) -> None: + self.config = config + self.policy = policy + self.sql_store: AuthorizedSqlStore | None = None + + async def initialize(self) -> None: + """Initialize the files provider by setting up storage directory and metadata database.""" + # Create storage directory if it doesn't exist + storage_path = Path(self.config.storage_dir) + storage_path.mkdir(parents=True, exist_ok=True) + + # Initialize SQL store for metadata + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.config.metadata_store), self.policy) + await self.sql_store.create_table( + "openai_files", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "filename": ColumnType.STRING, + "purpose": ColumnType.STRING, + "bytes": ColumnType.INTEGER, + "created_at": ColumnType.INTEGER, + "expires_at": ColumnType.INTEGER, + "file_path": ColumnType.STRING, # Path to actual file on disk + }, + ) + + async def shutdown(self) -> None: + pass + + def _generate_file_id(self) -> str: + """Generate a unique file ID for OpenAI API.""" + return generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}") + + def _get_file_path(self, file_id: str) -> Path: + """Get the filesystem path for a file ID.""" + return Path(self.config.storage_dir) / file_id + + async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]: + """Look up a OpenAIFileObject and filesystem path from its ID.""" + if not self.sql_store: + raise RuntimeError("Files provider not initialized") + + row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) + if not row: + raise ResourceNotFoundError(file_id, "File", "client.files.list()") + + file_path = Path(row.pop("file_path")) + return OpenAIFileObject(**row), file_path + + # OpenAI Files API Implementation + async def openai_upload_file( + self, + file: Annotated[UploadFile, File()], + purpose: Annotated[OpenAIFilePurpose, Form()], + expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, + ) -> OpenAIFileObject: + """Upload a file that can be used across various endpoints.""" + if not self.sql_store: + raise RuntimeError("Files provider not initialized") + + if expires_after is not None: + logger.warning( + f"File expiration is not supported by this provider, ignoring expires_after: {expires_after}" + ) + + file_id = self._generate_file_id() + file_path = self._get_file_path(file_id) + + content = await file.read() + file_size = len(content) + + with open(file_path, "wb") as f: + f.write(content) + + created_at = int(time.time()) + expires_at = created_at + self.config.ttl_secs + + await self.sql_store.insert( + "openai_files", + { + "id": file_id, + "filename": file.filename or "uploaded_file", + "purpose": purpose.value, + "bytes": file_size, + "created_at": created_at, + "expires_at": expires_at, + "file_path": file_path.as_posix(), + }, + ) + + return OpenAIFileObject( + id=file_id, + filename=file.filename or "uploaded_file", + purpose=purpose, + bytes=file_size, + created_at=created_at, + expires_at=expires_at, + ) + + async def openai_list_files( + self, + after: str | None = None, + limit: int | None = 10000, + order: Order | None = Order.desc, + purpose: OpenAIFilePurpose | None = None, + ) -> ListOpenAIFileResponse: + """Returns a list of files that belong to the user's organization.""" + if not self.sql_store: + raise RuntimeError("Files provider not initialized") + + if not order: + order = Order.desc + + where_conditions = {} + if purpose: + where_conditions["purpose"] = purpose.value + + paginated_result = await self.sql_store.fetch_all( + table="openai_files", + where=where_conditions if where_conditions else None, + order_by=[("created_at", order.value)], + cursor=("id", after) if after else None, + limit=limit, + ) + + files = [ + OpenAIFileObject( + id=row["id"], + filename=row["filename"], + purpose=OpenAIFilePurpose(row["purpose"]), + bytes=row["bytes"], + created_at=row["created_at"], + expires_at=row["expires_at"], + ) + for row in paginated_result.data + ] + + return ListOpenAIFileResponse( + data=files, + has_more=paginated_result.has_more, + first_id=files[0].id if files else "", + last_id=files[-1].id if files else "", + ) + + async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: + """Returns information about a specific file.""" + file_obj, _ = await self._lookup_file_id(file_id) + + return file_obj + + async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: + """Delete a file.""" + # Delete physical file + _, file_path = await self._lookup_file_id(file_id) + if file_path.exists(): + file_path.unlink() + + # Delete metadata from database + assert self.sql_store is not None, "Files provider not initialized" + await self.sql_store.delete("openai_files", where={"id": file_id}) + + return OpenAIFileDeleteResponse( + id=file_id, + deleted=True, + ) + + async def openai_retrieve_file_content(self, file_id: str) -> Response: + """Returns the contents of the specified file.""" + # Read file content + file_obj, file_path = await self._lookup_file_id(file_id) + + if not file_path.exists(): + logger.warning(f"File '{file_id}'s underlying '{file_path}' is missing, deleting metadata.") + await self.openai_delete_file(file_id) + raise ResourceNotFoundError(file_id, "File", "client.files.list()") + + # Return as binary response with appropriate content type + return Response( + content=file_path.read_bytes(), + media_type="application/octet-stream", + headers={"Content-Disposition": f'attachment; filename="{file_obj.filename}"'}, + ) diff --git a/llama_stack/models/llama/llama4/vision/__init__.py b/src/llama_stack/providers/inline/inference/__init__.py similarity index 100% rename from llama_stack/models/llama/llama4/vision/__init__.py rename to src/llama_stack/providers/inline/inference/__init__.py diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/src/llama_stack/providers/inline/inference/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/inference/meta_reference/__init__.py rename to src/llama_stack/providers/inline/inference/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/inference/meta_reference/common.py b/src/llama_stack/providers/inline/inference/meta_reference/common.py similarity index 100% rename from llama_stack/providers/inline/inference/meta_reference/common.py rename to src/llama_stack/providers/inline/inference/meta_reference/common.py diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py new file mode 100644 index 000000000..ec6e8bfe8 --- /dev/null +++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py @@ -0,0 +1,68 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, field_validator + +from llama_stack.providers.utils.inference import supported_inference_models +from llama_stack_api import QuantizationConfig + + +class MetaReferenceInferenceConfig(BaseModel): + # this is a placeholder to indicate inference model id + # the actual inference model id is dtermined by the moddel id in the request + # Note: you need to register the model before using it for inference + # models in the resouce list in the run.yaml config will be registered automatically + model: str | None = None + torch_seed: int | None = None + max_seq_len: int = 4096 + max_batch_size: int = 1 + model_parallel_size: int | None = None + + # when this is False, we assume that the distributed process group is setup by someone + # outside of this code (e.g., when run inside `torchrun`). that is useful for clients + # (including our testing code) who might be using llama-stack as a library. + create_distributed_process_group: bool = True + + # By default, the implementation will look at ~/.llama/checkpoints/ but you + # can override by specifying the directory explicitly + checkpoint_dir: str | None = None + + quantization: QuantizationConfig | None = None + + @field_validator("model") + @classmethod + def validate_model(cls, model: str) -> str: + permitted_models = supported_inference_models() + descriptors = [m.descriptor() for m in permitted_models] + repos = [m.huggingface_repo for m in permitted_models if m.huggingface_repo is not None] + if model not in (descriptors + repos): + model_list = "\n\t".join(repos) + raise ValueError(f"Unknown model: `{model}`. Choose from [\n\t{model_list}\n]") + return model + + @classmethod + def sample_run_config( + cls, + model: str = "Llama3.2-3B-Instruct", + checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}", + quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}", + model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}", + max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}", + max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}", + **kwargs, + ) -> dict[str, Any]: + return { + "model": model, + "checkpoint_dir": checkpoint_dir, + "quantization": { + "type": quantization_type, + }, + "model_parallel_size": model_parallel_size, + "max_batch_size": max_batch_size, + "max_seq_len": max_seq_len, + } diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py new file mode 100644 index 000000000..6781d0af9 --- /dev/null +++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py @@ -0,0 +1,201 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import math +from typing import Optional + +import torch +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData + +from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat +from llama_stack.models.llama.llama3.generation import Llama3 +from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer +from llama_stack.models.llama.llama4.generation import Llama4 +from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer +from llama_stack.models.llama.sku_types import Model, ModelFamily +from llama_stack_api import ( + GreedySamplingStrategy, + JsonSchemaResponseFormat, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIResponseFormatJSONSchema, + ResponseFormat, + ResponseFormatType, + SamplingParams, + TopPSamplingStrategy, +) + +from .common import model_checkpoint_dir +from .config import MetaReferenceInferenceConfig +from .inference import resolve_model + +Tokenizer = Llama4Tokenizer | Llama3Tokenizer + + +class LogitsProcessor: + def __init__(self, token_enforcer: TokenEnforcer): + self.token_enforcer = token_enforcer + self.mask: torch.Tensor | None = None + + def __call__(self, tokens: torch.Tensor, scores: torch.Tensor) -> torch.Tensor: + token_sequence = tokens[0, :].tolist() + allowed_tokens = self.token_enforcer.get_allowed_tokens(token_sequence) + + if self.mask is not None: + self.mask.fill_(-math.inf) + else: + self.mask = torch.full_like(scores, -math.inf) + + self.mask[:, :, allowed_tokens] = 0 + scores = scores + self.mask + return scores + + +def get_logits_processor( + tokenizer: Tokenizer, + vocab_size: int, + response_format: ResponseFormat | None, +) -> Optional["LogitsProcessor"]: + if response_format is None: + return None + + if not isinstance(response_format, JsonSchemaResponseFormat): + raise ValueError(f"Unsupported response format type {response_format.type}") + + parser = JsonSchemaParser(response_format.json_schema) + data = TokenEnforcerTokenizerData( + _build_regular_tokens_list(tokenizer, vocab_size), + tokenizer.decode, + tokenizer.stop_tokens, + ) + token_enforcer = TokenEnforcer(data, parser) + return LogitsProcessor(token_enforcer) + + +def _build_regular_tokens_list(tokenizer: Tokenizer, vocab_size: int) -> list[tuple[int, str, bool]]: + token_0 = tokenizer.encode("0", bos=False, eos=False)[-1] + regular_tokens = [] + + special_token_ids = set(tokenizer.special_tokens.values()) + for token_idx in range(vocab_size): + if token_idx in special_token_ids: + continue + + # We prepend token 0 and skip the first letter of the result to get a space if the token is a start word. + decoded_after_0 = tokenizer.decode([token_0, token_idx])[1:] + decoded_regular = tokenizer.decode([token_idx]) + is_word_start_token = len(decoded_after_0) > len(decoded_regular) + regular_tokens.append((token_idx, decoded_after_0, is_word_start_token)) + return regular_tokens + + +def _infer_sampling_params(sampling_params: SamplingParams): + if isinstance(sampling_params.strategy, GreedySamplingStrategy): + temperature = 0.0 + top_p = 1.0 + elif isinstance(sampling_params.strategy, TopPSamplingStrategy): + temperature = sampling_params.strategy.temperature or 1.0 + top_p = sampling_params.strategy.top_p or 1.0 + else: + raise ValueError(f"Unsupported sampling strategy {sampling_params.strategy}") + return temperature, top_p + + +class LlamaGenerator: + def __init__( + self, + config: MetaReferenceInferenceConfig, + model_id: str, + llama_model: Model, + ): + if config.checkpoint_dir and config.checkpoint_dir != "null": + ckpt_dir = config.checkpoint_dir + else: + resolved_model = resolve_model(model_id) + if resolved_model is None: + # if the model is not a native llama model, get the default checkpoint_dir based on model id + ckpt_dir = model_checkpoint_dir(model_id) + else: + # if the model is a native llama model, get the default checkpoint_dir based on model core_model_id value + ckpt_dir = model_checkpoint_dir(resolved_model.descriptor()) + + if config.quantization: + if config.quantization.type == "fp8_mixed": + quantization_mode = QuantizationMode.fp8_mixed + elif config.quantization.type == "int4_mixed": + quantization_mode = QuantizationMode.int4_mixed + elif config.quantization.type == "bf16": + quantization_mode = None + else: + raise ValueError(f"Unsupported quantization mode {config.quantization}") + else: + quantization_mode = None + + cls = Llama4 if llama_model.model_family == ModelFamily.llama4 else Llama3 + self.inner_generator = cls.build( + ckpt_dir=ckpt_dir, + max_seq_len=config.max_seq_len, + max_batch_size=config.max_batch_size, + world_size=config.model_parallel_size or llama_model.pth_file_count, + quantization_mode=quantization_mode, + ) + + self.tokenizer = self.inner_generator.tokenizer + self.args = self.inner_generator.args + self.formatter = self.inner_generator.formatter + + def chat_completion( + self, + request: OpenAIChatCompletionRequestWithExtraBody, + raw_messages: list, + ): + """Generate chat completion using OpenAI request format. + + Args: + request: OpenAI chat completion request + raw_messages: Pre-converted list of RawMessage objects + """ + + # Determine tool prompt format + tool_prompt_format = ToolPromptFormat.json if request.tools else ToolPromptFormat.json + + # Prepare sampling params + sampling_params = SamplingParams() + if request.temperature is not None or request.top_p is not None: + sampling_params.strategy = TopPSamplingStrategy( + temperature=request.temperature if request.temperature is not None else 1.0, + top_p=request.top_p if request.top_p is not None else 1.0, + ) + if request.max_tokens: + sampling_params.max_tokens = request.max_tokens + + max_gen_len = sampling_params.max_tokens + if max_gen_len is None or max_gen_len == 0 or max_gen_len >= self.args.max_seq_len: + max_gen_len = self.args.max_seq_len - 1 + + temperature, top_p = _infer_sampling_params(sampling_params) + + # Get logits processor for response format + logits_processor = None + if request.response_format: + if isinstance(request.response_format, OpenAIResponseFormatJSONSchema): + # Extract the actual schema from OpenAIJSONSchema TypedDict + schema_dict = request.response_format.json_schema.get("schema") or {} + json_schema_format = JsonSchemaResponseFormat( + type=ResponseFormatType.json_schema, + json_schema=schema_dict, + ) + logits_processor = get_logits_processor(self.tokenizer, self.args.vocab_size, json_schema_format) + + # Generate + yield from self.inner_generator.generate( + llm_inputs=[self.formatter.encode_dialog_prompt(raw_messages, tool_prompt_format)], + max_gen_len=max_gen_len, + temperature=temperature, + top_p=top_p, + logprobs=False, + echo=False, + logits_processor=logits_processor, + ) diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py new file mode 100644 index 000000000..42d1299ab --- /dev/null +++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -0,0 +1,542 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import time +import uuid +from collections.abc import AsyncIterator + +from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition +from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat +from llama_stack.models.llama.llama3.prompt_templates import ( + JsonCustomToolGenerator, + SystemDefaultGenerator, +) +from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer +from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat +from llama_stack.models.llama.llama4.prompt_templates.system_prompts import ( + PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4, +) +from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer +from llama_stack.models.llama.sku_list import resolve_model +from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal +from llama_stack.providers.utils.inference.embedding_mixin import ( + SentenceTransformerEmbeddingMixin, +) +from llama_stack.providers.utils.inference.model_registry import ( + ModelRegistryHelper, + build_hf_repo_model_entry, +) +from llama_stack_api import ( + InferenceProvider, + Model, + ModelsProtocolPrivate, + ModelType, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionUsage, + OpenAIChoice, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, + OpenAIUserMessageParam, + ToolChoice, +) + +from .config import MetaReferenceInferenceConfig +from .generators import LlamaGenerator +from .model_parallel import LlamaModelParallelGenerator + +log = get_logger(__name__, category="inference") +# there's a single model parallel process running serving the model. for now, +# we don't support multiple concurrent requests to this process. +SEMAPHORE = asyncio.Semaphore(1) + + +def _convert_openai_tool_to_tool_definition(tool) -> ToolDefinition: + """Convert OpenAI tool format to ToolDefinition format.""" + # OpenAI tools have function.name and function.parameters + return ToolDefinition( + tool_name=tool.function.name, + description=tool.function.description or "", + parameters=tool.function.parameters or {}, + ) + + +def _get_tool_choice_prompt(tool_choice, tools) -> str: + """Generate prompt text for tool_choice behavior.""" + if not tool_choice or tool_choice == ToolChoice.auto or tool_choice == "auto": + return "" + elif tool_choice == ToolChoice.required or tool_choice == "required": + return "You MUST use one of the provided functions/tools to answer the user query." + elif tool_choice == ToolChoice.none or tool_choice == "none": + return "" + else: + # Specific tool specified + return f"You MUST use the tool `{tool_choice}` to answer the user query." + + +def _raw_content_as_str(content) -> str: + """Convert RawContent to string for system messages.""" + if isinstance(content, str): + return content + elif isinstance(content, RawTextItem): + return content.text + elif isinstance(content, list): + return "\n".join(_raw_content_as_str(c) for c in content) + else: + return "" + + +def _augment_raw_messages_for_tools_llama_3_1( + raw_messages: list[RawMessage], + tools: list, + tool_choice, +) -> list[RawMessage]: + """Augment raw messages with tool definitions for Llama 3.1 style models.""" + messages = raw_messages.copy() + existing_system_message = None + if messages and messages[0].role == "system": + existing_system_message = messages.pop(0) + + sys_content = "" + + # Add tool definitions first (if present) + if tools: + # Convert OpenAI tools to ToolDefinitions + tool_definitions = [_convert_openai_tool_to_tool_definition(t) for t in tools] + + # For OpenAI format, all tools are custom (have string names) + tool_gen = JsonCustomToolGenerator() + tool_template = tool_gen.gen(tool_definitions) + sys_content += tool_template.render() + sys_content += "\n" + + # Add default system prompt + default_gen = SystemDefaultGenerator() + default_template = default_gen.gen() + sys_content += default_template.render() + + # Add existing system message if present + if existing_system_message: + sys_content += "\n" + _raw_content_as_str(existing_system_message.content) + + # Add tool choice prompt if needed + if tool_choice_prompt := _get_tool_choice_prompt(tool_choice, tools): + sys_content += "\n" + tool_choice_prompt + + # Create new system message + new_system_message = RawMessage( + role="system", + content=[RawTextItem(text=sys_content.strip())], + ) + + return [new_system_message] + messages + + +def _augment_raw_messages_for_tools_llama_4( + raw_messages: list[RawMessage], + tools: list, + tool_choice, +) -> list[RawMessage]: + """Augment raw messages with tool definitions for Llama 4/3.2/3.3 style models.""" + messages = raw_messages.copy() + existing_system_message = None + if messages and messages[0].role == "system": + existing_system_message = messages.pop(0) + + sys_content = "" + + # Add tool definitions if present + if tools: + # Convert OpenAI tools to ToolDefinitions + tool_definitions = [_convert_openai_tool_to_tool_definition(t) for t in tools] + + # Use python_list format for Llama 4 + tool_gen = PythonListCustomToolGeneratorLlama4() + system_prompt = None + if existing_system_message: + system_prompt = _raw_content_as_str(existing_system_message.content) + + tool_template = tool_gen.gen(tool_definitions, system_prompt) + sys_content = tool_template.render() + elif existing_system_message: + # No tools, just use existing system message + sys_content = _raw_content_as_str(existing_system_message.content) + + # Add tool choice prompt if needed + if tool_choice_prompt := _get_tool_choice_prompt(tool_choice, tools): + sys_content += "\n" + tool_choice_prompt + + if sys_content: + new_system_message = RawMessage( + role="system", + content=[RawTextItem(text=sys_content.strip())], + ) + return [new_system_message] + messages + + return messages + + +def augment_raw_messages_for_tools( + raw_messages: list[RawMessage], + params: OpenAIChatCompletionRequestWithExtraBody, + llama_model, +) -> list[RawMessage]: + """Augment raw messages with tool definitions based on model family.""" + if not params.tools: + return raw_messages + + # Determine augmentation strategy based on model family + if llama_model.model_family == ModelFamily.llama3_1 or ( + llama_model.model_family == ModelFamily.llama3_2 and is_multimodal(llama_model.core_model_id) + ): + # Llama 3.1 and Llama 3.2 multimodal use JSON format + return _augment_raw_messages_for_tools_llama_3_1( + raw_messages, + params.tools, + params.tool_choice, + ) + elif llama_model.model_family in ( + ModelFamily.llama3_2, + ModelFamily.llama3_3, + ModelFamily.llama4, + ): + # Llama 3.2/3.3/4 use python_list format + return _augment_raw_messages_for_tools_llama_4( + raw_messages, + params.tools, + params.tool_choice, + ) + else: + # Default to Llama 3.1 style + return _augment_raw_messages_for_tools_llama_3_1( + raw_messages, + params.tools, + params.tool_choice, + ) + + +def llama_builder_fn(config: MetaReferenceInferenceConfig, model_id: str, llama_model: Model) -> LlamaGenerator: + return LlamaGenerator(config, model_id, llama_model) + + +class MetaReferenceInferenceImpl( + SentenceTransformerEmbeddingMixin, + InferenceProvider, + ModelsProtocolPrivate, +): + def __init__(self, config: MetaReferenceInferenceConfig) -> None: + self.config = config + self.model_id = None + self.llama_model = None + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + if self.config.create_distributed_process_group: + self.generator.stop() + + async def openai_completion( + self, + params: OpenAICompletionRequestWithExtraBody, + ) -> OpenAICompletion: + raise NotImplementedError("OpenAI completion not supported by meta reference provider") + + async def should_refresh_models(self) -> bool: + return False + + async def list_models(self) -> list[Model] | None: + return None + + async def unregister_model(self, model_id: str) -> None: + pass + + async def register_model(self, model: Model) -> Model: + llama_model = ( + resolve_model(model.metadata["llama_model"]) + if "llama_model" in model.metadata + else resolve_model(model.identifier) + ) + if llama_model is None: + raise ValueError( + "Please make sure your llama_model in model metadata or model identifier is in Llama SKU list" + ) + + self.model_registry_helper = ModelRegistryHelper( + [ + build_hf_repo_model_entry( + llama_model.descriptor(), + llama_model.core_model_id.value, + ) + ], + ) + model = await self.model_registry_helper.register_model(model) + + if model.model_type == ModelType.embedding: + self._load_sentence_transformer_model(model.provider_resource_id) + + # TODO: what is this?! you can't really specify skipping via model metadata + # kill this madness + if "skip_load" in model.metadata and model.metadata["skip_load"]: + return model + + await self.load_model(model.identifier, llama_model) + return model + + async def load_model(self, model_id, llama_model) -> None: + log.info(f"Loading model `{model_id}`") + + builder_params = [self.config, model_id, llama_model] + + if self.config.create_distributed_process_group: + self.generator = LlamaModelParallelGenerator( + model_parallel_size=self.config.model_parallel_size or llama_model.pth_file_count, + builder_fn=llama_builder_fn, + builder_params=builder_params, + formatter=( + Llama4ChatFormat(Llama4Tokenizer.get_instance()) + if llama_model.model_family == ModelFamily.llama4 + else Llama3ChatFormat(Llama3Tokenizer.get_instance()) + ), + ) + self.generator.start() + else: + self.generator = llama_builder_fn(*builder_params) + + self.model_id = model_id + self.llama_model = llama_model + + log.info("Warming up...") + + await self.openai_chat_completion( + params=OpenAIChatCompletionRequestWithExtraBody( + model=model_id, + messages=[OpenAIUserMessageParam(role="user", content="Hi how are you?")], + max_tokens=20, + ) + ) + log.info("Warmed up!") + + def check_model(self, request) -> None: + if self.model_id is None or self.llama_model is None: + raise RuntimeError( + "No available model yet, please register your requested model or add your model in the resources first" + ) + elif request.model != self.model_id: + raise RuntimeError(f"Model mismatch: request model: {request.model} != loaded model: {self.model_id}") + + async def openai_chat_completion( + self, + params: OpenAIChatCompletionRequestWithExtraBody, + ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: + self.check_model(params) + + # Convert OpenAI messages to RawMessages + from llama_stack.models.llama.datatypes import StopReason + from llama_stack.providers.utils.inference.prompt_adapter import ( + convert_openai_message_to_raw_message, + decode_assistant_message, + ) + + raw_messages = [await convert_openai_message_to_raw_message(msg) for msg in params.messages] + + # Augment messages with tool definitions if tools are present + raw_messages = augment_raw_messages_for_tools(raw_messages, params, self.llama_model) + + # Call generator's chat_completion method (works for both single-GPU and model-parallel) + if isinstance(self.generator, LlamaGenerator): + generator = self.generator.chat_completion(params, raw_messages) + else: + # Model parallel: submit task to process group + generator = self.generator.group.run_inference(("chat_completion", [params, raw_messages])) + + # Check if streaming is requested + if params.stream: + return self._stream_chat_completion(generator, params) + + # Non-streaming: collect all generated text + generated_text = "" + for result_batch in generator: + for result in result_batch: + if not result.ignore_token and result.source == "output": + generated_text += result.text + + # Decode assistant message to extract tool calls and determine stop_reason + # Default to end_of_turn if generation completed normally + decoded_message = decode_assistant_message(generated_text, StopReason.end_of_turn) + + # Convert tool calls to OpenAI format + openai_tool_calls = None + if decoded_message.tool_calls: + from llama_stack_api import ( + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + ) + + openai_tool_calls = [ + OpenAIChatCompletionToolCall( + # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative. + id=f"call_{uuid.uuid4().hex[:24]}", + type="function", + function=OpenAIChatCompletionToolCallFunction( + name=str(tc.tool_name), + arguments=tc.arguments, + ), + ) + for tc in decoded_message.tool_calls + ] + + # Determine finish_reason based on whether tool calls are present + finish_reason = "tool_calls" if openai_tool_calls else "stop" + + # Extract content from decoded message + content = "" + if isinstance(decoded_message.content, str): + content = decoded_message.content + elif isinstance(decoded_message.content, list): + for item in decoded_message.content: + if isinstance(item, RawTextItem): + content += item.text + + # Create OpenAI response + # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative. + response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}" + created = int(time.time()) + + return OpenAIChatCompletion( + id=response_id, + object="chat.completion", + created=created, + model=params.model, + choices=[ + OpenAIChoice( + index=0, + message=OpenAIAssistantMessageParam( + role="assistant", + content=content, + tool_calls=openai_tool_calls, + ), + finish_reason=finish_reason, + logprobs=None, + ) + ], + usage=OpenAIChatCompletionUsage( + prompt_tokens=0, # TODO: calculate properly + completion_tokens=0, # TODO: calculate properly + total_tokens=0, # TODO: calculate properly + ), + ) + + async def _stream_chat_completion( + self, + generator, + params: OpenAIChatCompletionRequestWithExtraBody, + ) -> AsyncIterator[OpenAIChatCompletionChunk]: + """Stream chat completion chunks as they're generated.""" + from llama_stack.models.llama.datatypes import StopReason + from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message + from llama_stack_api import ( + OpenAIChatCompletionChunk, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChoiceDelta, + OpenAIChunkChoice, + ) + + response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}" + created = int(time.time()) + generated_text = "" + + # Yield chunks as tokens are generated + for result_batch in generator: + for result in result_batch: + if result.ignore_token or result.source != "output": + continue + + generated_text += result.text + + # Yield delta chunk with the new text + chunk = OpenAIChatCompletionChunk( + id=response_id, + object="chat.completion.chunk", + created=created, + model=params.model, + choices=[ + OpenAIChunkChoice( + index=0, + delta=OpenAIChoiceDelta( + role="assistant", + content=result.text, + ), + finish_reason="", + logprobs=None, + ) + ], + ) + yield chunk + + # After generation completes, decode the full message to extract tool calls + decoded_message = decode_assistant_message(generated_text, StopReason.end_of_turn) + + # If tool calls are present, yield a final chunk with tool_calls + if decoded_message.tool_calls: + openai_tool_calls = [ + OpenAIChatCompletionToolCall( + # generate a uuid for the call id. This is the only inline provider that does this, so need to get creative. + id=f"call_{uuid.uuid4().hex[:24]}", + type="function", + function=OpenAIChatCompletionToolCallFunction( + name=str(tc.tool_name), + arguments=tc.arguments, + ), + ) + for tc in decoded_message.tool_calls + ] + + # Yield chunk with tool_calls + chunk = OpenAIChatCompletionChunk( + id=response_id, + object="chat.completion.chunk", + created=created, + model=params.model, + choices=[ + OpenAIChunkChoice( + index=0, + delta=OpenAIChoiceDelta( + role="assistant", + tool_calls=openai_tool_calls, + ), + finish_reason="", + logprobs=None, + ) + ], + ) + yield chunk + + finish_reason = "tool_calls" + else: + finish_reason = "stop" + + # Yield final chunk with finish_reason + final_chunk = OpenAIChatCompletionChunk( + id=response_id, + object="chat.completion.chunk", + created=created, + model=params.model, + choices=[ + OpenAIChunkChoice( + index=0, + delta=OpenAIChoiceDelta(), + finish_reason=finish_reason, + logprobs=None, + ) + ], + ) + yield final_chunk diff --git a/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py new file mode 100644 index 000000000..f50b41f34 --- /dev/null +++ b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Callable +from functools import partial +from typing import Any + +from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat +from llama_stack.models.llama.llama4.chat_format import ChatFormat as Llama4ChatFormat + +from .parallel_utils import ModelParallelProcessGroup + + +class ModelRunner: + def __init__(self, llama): + self.llama = llama + + def __call__(self, task: Any): + task_type = task[0] + if task_type == "chat_completion": + # task[1] is [params, raw_messages] + params, raw_messages = task[1] + return self.llama.chat_completion(params, raw_messages) + else: + raise ValueError(f"Unexpected task type {task_type}") + + +def init_model_cb( + builder_fn: Callable, + params: list[Any], +): + llama = builder_fn(*params) + return ModelRunner(llama) + + +class LlamaModelParallelGenerator: + """ + This abstraction exists so + - we can run model parallel code without needing to run the CLIs via torchrun + - this also enables use model parallel code within a notebook context. + + A Context Manager is used to ensure that the model parallel process is started and stopped + correctly. This does make the ergonomics a little awkward, because it isn't immediately + clear at the callsite why we need to use a context manager. + """ + + def __init__( + self, + model_parallel_size: int, + builder_fn: Callable, + builder_params: list[Any], + formatter: Llama3ChatFormat | Llama4ChatFormat, + ): + self.model_parallel_size = model_parallel_size + self.builder_fn = builder_fn + self.builder_params = builder_params + self.formatter = formatter + + def start(self): + self.__enter__() + + def stop(self): + self.__exit__(None, None, None) + + def __enter__(self): + self.group = ModelParallelProcessGroup( + self.model_parallel_size, + init_model_cb=partial(init_model_cb, self.builder_fn, self.builder_params), + ) + self.group.start() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.group.stop() diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py similarity index 96% rename from llama_stack/providers/inline/inference/meta_reference/parallel_utils.py rename to src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index bb6a1bd03..663e4793b 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -33,10 +33,6 @@ from torch.distributed.launcher.api import LaunchConfig, elastic_launch from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import GenerationResult -from llama_stack.providers.utils.inference.prompt_adapter import ( - ChatCompletionRequestWithRawContent, - CompletionRequestWithRawContent, -) log = get_logger(name=__name__, category="inference") @@ -69,10 +65,7 @@ class CancelSentinel(BaseModel): class TaskRequest(BaseModel): type: Literal[ProcessingMessageName.task_request] = ProcessingMessageName.task_request - task: tuple[ - str, - list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent], - ] + task: tuple[str, list] class TaskResponse(BaseModel): @@ -328,10 +321,7 @@ class ModelParallelProcessGroup: def run_inference( self, - req: tuple[ - str, - list[CompletionRequestWithRawContent] | list[ChatCompletionRequestWithRawContent], - ], + req: tuple[str, list], ) -> Generator: assert not self.running, "inference already running" diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py similarity index 100% rename from llama_stack/providers/inline/inference/sentence_transformers/__init__.py rename to src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/src/llama_stack/providers/inline/inference/sentence_transformers/config.py similarity index 100% rename from llama_stack/providers/inline/inference/sentence_transformers/config.py rename to src/llama_stack/providers/inline/inference/sentence_transformers/config.py diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py similarity index 86% rename from llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py rename to src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index cb72aa13a..b5cadeec2 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -6,24 +6,20 @@ from collections.abc import AsyncIterator -from llama_stack.apis.inference import ( - InferenceProvider, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, -) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, -) -from llama_stack.apis.models import ModelType from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference.embedding_mixin import ( SentenceTransformerEmbeddingMixin, ) -from llama_stack.providers.utils.inference.openai_compat import ( - OpenAIChatCompletionToLlamaStackMixin, +from llama_stack_api import ( + InferenceProvider, + Model, + ModelsProtocolPrivate, + ModelType, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, ) from .config import SentenceTransformersInferenceConfig @@ -32,7 +28,6 @@ log = get_logger(name=__name__, category="inference") class SentenceTransformersInferenceImpl( - OpenAIChatCompletionToLlamaStackMixin, SentenceTransformerEmbeddingMixin, InferenceProvider, ModelsProtocolPrivate, diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift similarity index 100% rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift diff --git a/llama_stack/providers/__init__.py b/src/llama_stack/providers/inline/post_training/__init__.py similarity index 100% rename from llama_stack/providers/__init__.py rename to src/llama_stack/providers/inline/post_training/__init__.py diff --git a/llama_stack/providers/inline/__init__.py b/src/llama_stack/providers/inline/post_training/common/__init__.py similarity index 100% rename from llama_stack/providers/inline/__init__.py rename to src/llama_stack/providers/inline/post_training/common/__init__.py diff --git a/llama_stack/providers/inline/post_training/common/utils.py b/src/llama_stack/providers/inline/post_training/common/utils.py similarity index 100% rename from llama_stack/providers/inline/post_training/common/utils.py rename to src/llama_stack/providers/inline/post_training/common/utils.py diff --git a/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py similarity index 88% rename from llama_stack/providers/inline/post_training/common/validator.py rename to src/llama_stack/providers/inline/post_training/common/validator.py index 950b75f86..cc018c865 100644 --- a/llama_stack/providers/inline/post_training/common/validator.py +++ b/src/llama_stack/providers/inline/post_training/common/validator.py @@ -12,14 +12,10 @@ from typing import Any -from llama_stack.apis.common.type_system import ( - ChatCompletionInputType, - DialogType, - StringType, -) from llama_stack.providers.utils.common.data_schema_validator import ( ColumnName, ) +from llama_stack_api import ChatCompletionInputType, DialogType, StringType EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = { "instruct": [ diff --git a/llama_stack/providers/inline/post_training/huggingface/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/huggingface/__init__.py rename to src/llama_stack/providers/inline/post_training/huggingface/__init__.py diff --git a/llama_stack/providers/inline/post_training/huggingface/config.py b/src/llama_stack/providers/inline/post_training/huggingface/config.py similarity index 100% rename from llama_stack/providers/inline/post_training/huggingface/config.py rename to src/llama_stack/providers/inline/post_training/huggingface/config.py diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py new file mode 100644 index 000000000..fa939d439 --- /dev/null +++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py @@ -0,0 +1,208 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from enum import Enum +from typing import Any + +from llama_stack.providers.inline.post_training.huggingface.config import ( + HuggingFacePostTrainingConfig, +) +from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler +from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus +from llama_stack_api import ( + AlgorithmConfig, + Checkpoint, + DatasetIO, + Datasets, + DPOAlignmentConfig, + JobStatus, + ListPostTrainingJobsResponse, + PostTrainingJob, + PostTrainingJobArtifactsResponse, + PostTrainingJobStatusResponse, + TrainingConfig, +) + + +class TrainingArtifactType(Enum): + CHECKPOINT = "checkpoint" + RESOURCES_STATS = "resources_stats" + + +_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune" +_JOB_TYPE_DPO_TRAINING = "dpo-training" + + +class HuggingFacePostTrainingImpl: + def __init__( + self, + config: HuggingFacePostTrainingConfig, + datasetio_api: DatasetIO, + datasets: Datasets, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets + self._scheduler = Scheduler() + + async def shutdown(self) -> None: + await self._scheduler.shutdown() + + @staticmethod + def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact: + return JobArtifact( + type=TrainingArtifactType.CHECKPOINT.value, + name=checkpoint.identifier, + uri=checkpoint.path, + metadata=dict(checkpoint), + ) + + @staticmethod + def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact: + return JobArtifact( + type=TrainingArtifactType.RESOURCES_STATS.value, + name=TrainingArtifactType.RESOURCES_STATS.value, + metadata=resources_stats, + ) + + async def supervised_fine_tune( + self, + job_uuid: str, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + model: str, + checkpoint_dir: str | None = None, + algorithm_config: AlgorithmConfig | None = None, + ) -> PostTrainingJob: + async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): + from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device import ( + HFFinetuningSingleDevice, + ) + + on_log_message_cb("Starting HF finetuning") + + recipe = HFFinetuningSingleDevice( + job_uuid=job_uuid, + datasetio_api=self.datasetio_api, + datasets_api=self.datasets_api, + ) + + resources_allocated, checkpoints = await recipe.train( + model=model, + output_dir=checkpoint_dir, + job_uuid=job_uuid, + lora_config=algorithm_config, + config=training_config, + provider_config=self.config, + ) + + on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) + if checkpoints: + for checkpoint in checkpoints: + artifact = self._checkpoint_to_artifact(checkpoint) + on_artifact_collected_cb(artifact) + + on_status_change_cb(SchedulerJobStatus.completed) + on_log_message_cb("HF finetuning completed") + + job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler) + return PostTrainingJob(job_uuid=job_uuid) + + async def preference_optimize( + self, + job_uuid: str, + finetuned_model: str, + algorithm_config: DPOAlignmentConfig, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + ) -> PostTrainingJob: + async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): + from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device_dpo import ( + HFDPOAlignmentSingleDevice, + ) + + on_log_message_cb("Starting HF DPO alignment") + + recipe = HFDPOAlignmentSingleDevice( + job_uuid=job_uuid, + datasetio_api=self.datasetio_api, + datasets_api=self.datasets_api, + ) + + resources_allocated, checkpoints = await recipe.train( + model=finetuned_model, + output_dir=f"{self.config.dpo_output_dir}/{job_uuid}", + job_uuid=job_uuid, + dpo_config=algorithm_config, + config=training_config, + provider_config=self.config, + ) + + on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) + if checkpoints: + for checkpoint in checkpoints: + artifact = self._checkpoint_to_artifact(checkpoint) + on_artifact_collected_cb(artifact) + else: + on_log_message_cb("Warning: No checkpoints were saved during DPO training") + + on_status_change_cb(SchedulerJobStatus.completed) + on_log_message_cb("HF DPO alignment completed") + + job_uuid = self._scheduler.schedule(_JOB_TYPE_DPO_TRAINING, job_uuid, handler) + return PostTrainingJob(job_uuid=job_uuid) + + @staticmethod + def _get_artifacts_metadata_by_type(job, artifact_type): + return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type] + + @classmethod + def _get_checkpoints(cls, job): + return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value) + + @classmethod + def _get_resources_allocated(cls, job): + data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value) + return data[0] if data else None + + async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None: + job = self._scheduler.get_job(job_uuid) + + match job.status: + # TODO: Add support for other statuses to API + case SchedulerJobStatus.new | SchedulerJobStatus.scheduled: + status = JobStatus.scheduled + case SchedulerJobStatus.running: + status = JobStatus.in_progress + case SchedulerJobStatus.completed: + status = JobStatus.completed + case SchedulerJobStatus.failed: + status = JobStatus.failed + case _: + raise NotImplementedError() + + return PostTrainingJobStatusResponse( + job_uuid=job_uuid, + status=status, + scheduled_at=job.scheduled_at, + started_at=job.started_at, + completed_at=job.completed_at, + checkpoints=self._get_checkpoints(job), + resources_allocated=self._get_resources_allocated(job), + ) + + async def cancel_training_job(self, job_uuid: str) -> None: + self._scheduler.cancel(job_uuid) + + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None: + job = self._scheduler.get_job(job_uuid) + return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job)) + + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: + return ListPostTrainingJobsResponse( + data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] + ) diff --git a/llama_stack/providers/inline/agents/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py similarity index 100% rename from llama_stack/providers/inline/agents/__init__.py rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py similarity index 96% rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index d9ee3d2a8..c7c737fbd 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -14,24 +14,24 @@ import torch from datasets import Dataset from peft import LoraConfig from transformers import ( - AutoModelForCausalLM, AutoTokenizer, ) from trl import SFTConfig, SFTTrainer -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( +from llama_stack.log import get_logger +from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device +from llama_stack_api import ( Checkpoint, DataConfig, + DatasetIO, + Datasets, LoraFinetuningConfig, TrainingConfig, ) -from llama_stack.log import get_logger -from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from ..config import HuggingFacePostTrainingConfig from ..utils import ( + HFAutoModel, calculate_training_steps, create_checkpoints, get_memory_stats, @@ -338,7 +338,7 @@ class HFFinetuningSingleDevice: def save_model( self, - model_obj: AutoModelForCausalLM, + model_obj: HFAutoModel, trainer: SFTTrainer, peft_config: LoraConfig | None, output_dir_path: Path, @@ -350,14 +350,22 @@ class HFFinetuningSingleDevice: peft_config: Optional LoRA configuration output_dir_path: Path to save the model """ + from typing import cast + logger.info("Saving final model") model_obj.config.use_cache = True if peft_config: logger.info("Merging LoRA weights with base model") - model_obj = trainer.model.merge_and_unload() + # TRL's merge_and_unload returns a HuggingFace model + # Both cast() and type: ignore are needed here: + # - cast() tells mypy the return type is HFAutoModel for downstream code + # - type: ignore suppresses errors on the merge_and_unload() call itself, + # which mypy can't type-check due to TRL library's incomplete type stubs + model_obj = cast(HFAutoModel, trainer.model.merge_and_unload()) # type: ignore[union-attr,operator] else: - model_obj = trainer.model + # trainer.model is the trained HuggingFace model + model_obj = cast(HFAutoModel, trainer.model) save_path = output_dir_path / "merged_model" logger.info(f"Saving model to {save_path}") @@ -411,7 +419,7 @@ class HFFinetuningSingleDevice: # Initialize trainer logger.info("Initializing SFTTrainer") trainer = SFTTrainer( - model=model_obj, + model=model_obj, # type: ignore[arg-type] train_dataset=train_dataset, eval_dataset=eval_dataset, peft_config=peft_config, diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py similarity index 97% rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index b39a24c66..da2626555 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -16,15 +16,15 @@ from transformers import ( ) from trl import DPOConfig, DPOTrainer -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( +from llama_stack.log import get_logger +from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device +from llama_stack_api import ( Checkpoint, + DatasetIO, + Datasets, DPOAlignmentConfig, TrainingConfig, ) -from llama_stack.log import get_logger -from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from ..config import HuggingFacePostTrainingConfig from ..utils import ( @@ -309,7 +309,7 @@ class HFDPOAlignmentSingleDevice: save_total_limit=provider_config.save_total_limit, # DPO specific parameters beta=dpo_config.beta, - loss_type=provider_config.dpo_loss_type, + loss_type=provider_config.dpo_loss_type, # type: ignore[arg-type] ) def save_model( @@ -381,13 +381,16 @@ class HFDPOAlignmentSingleDevice: # Initialize DPO trainer logger.info("Initializing DPOTrainer") + # TRL library has incomplete type stubs - use Any to bypass + from typing import Any, cast + trainer = DPOTrainer( - model=model_obj, - ref_model=ref_model, + model=cast(Any, model_obj), # HFAutoModel satisfies PreTrainedModel protocol + ref_model=cast(Any, ref_model), args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, - processing_class=tokenizer, + processing_class=cast(Any, tokenizer), # AutoTokenizer satisfies interface ) try: diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py new file mode 100644 index 000000000..2037f70e7 --- /dev/null +++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py @@ -0,0 +1,290 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +import signal +import sys +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any, Protocol + +import psutil +import torch +from datasets import Dataset +from transformers import AutoConfig, AutoModelForCausalLM + +from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig + +if TYPE_CHECKING: + from transformers import PretrainedConfig + + +class HFAutoModel(Protocol): + """Protocol describing HuggingFace AutoModel interface. + + This protocol defines the common interface for HuggingFace AutoModelForCausalLM + and similar models, providing type safety without requiring type stubs. + """ + + config: PretrainedConfig + device: torch.device + + def to(self, device: torch.device) -> "HFAutoModel": ... + def save_pretrained(self, save_directory: str | Path) -> None: ... + + +from llama_stack.log import get_logger + +from .config import HuggingFacePostTrainingConfig + +logger = get_logger(name=__name__, category="post_training") + + +def setup_environment(): + """Setup common environment variables for training.""" + os.environ["TOKENIZERS_PARALLELISM"] = "false" + os.environ["MKL_THREADING_LAYER"] = "GNU" + os.environ["MKL_SERVICE_FORCE_INTEL"] = "0" + os.environ["MKL_NUM_THREADS"] = "1" + + +def bytes_to_gb(to_convert: int) -> str: + """Converts memory stats to GB and formats to 2 decimal places. + Args: + to_convert: Memory value in bytes + Returns: + str: Memory value in GB formatted to 2 decimal places + """ + return f"{(to_convert / (1024**3)):.2f}" + + +def get_memory_stats(device: torch.device) -> dict[str, Any]: + """Get memory statistics for the given device.""" + stats = { + "system_memory": { + "total": bytes_to_gb(psutil.virtual_memory().total), + "available": bytes_to_gb(psutil.virtual_memory().available), + "used": bytes_to_gb(psutil.virtual_memory().used), + "percent": psutil.virtual_memory().percent, + } + } + + if device.type == "cuda": + stats["device_memory"] = { + "allocated": bytes_to_gb(torch.cuda.memory_allocated(device)), + "reserved": bytes_to_gb(torch.cuda.memory_reserved(device)), + "max_allocated": bytes_to_gb(torch.cuda.max_memory_allocated(device)), + } + elif device.type == "mps": + # MPS doesn't provide direct memory stats, but we can track system memory + stats["device_memory"] = { + "note": "MPS memory stats not directly available", + "system_memory_used": bytes_to_gb(psutil.virtual_memory().used), + } + elif device.type == "cpu": + # For CPU, we track process memory usage + process = psutil.Process() + stats["device_memory"] = { + "process_rss": bytes_to_gb(process.memory_info().rss), + "process_vms": bytes_to_gb(process.memory_info().vms), + "process_percent": process.memory_percent(), + } + + return stats + + +def setup_torch_device(device_str: str) -> torch.device: + """Initialize and validate a PyTorch device. + This function handles device initialization and validation for different device types: + - CUDA: Validates CUDA availability and handles device selection + - MPS: Validates MPS availability for Apple Silicon + - CPU: Basic validation + - HPU: Raises error as it's not supported + Args: + device_str: String specifying the device ('cuda', 'cpu', 'mps') + Returns: + torch.device: The initialized and validated device + Raises: + RuntimeError: If device initialization fails or device is not supported + """ + try: + device = torch.device(device_str) + except RuntimeError as e: + raise RuntimeError(f"Error getting Torch Device {str(e)}") from e + + # Validate device capabilities + if device.type == "cuda": + if not torch.cuda.is_available(): + raise RuntimeError( + f"{device.type}: Torch has no CUDA/ROCm support or could not detect a compatible device." + ) + if device.index is None: + device = torch.device(device.type, torch.cuda.current_device()) + elif device.type == "mps": + if not torch.backends.mps.is_available(): + raise RuntimeError(f"{device.type}: Torch has no MPS support or could not detect a compatible device.") + elif device.type == "hpu": + raise RuntimeError(f"{device.type}: training does not support Intel Gaudi.") + + return device + + +async def load_rows_from_dataset(datasetio_api: DatasetIO, dataset_id: str) -> list[dict[str, Any]]: + """Load dataset from llama stack dataset provider""" + try: + all_rows = await datasetio_api.iterrows( + dataset_id=dataset_id, + limit=-1, + ) + if not isinstance(all_rows.data, list): + raise RuntimeError("Expected dataset data to be a list") + return all_rows.data + except Exception as e: + raise RuntimeError(f"Failed to load dataset: {str(e)}") from e + + +def load_model( + model: str, + device: torch.device, + provider_config: HuggingFacePostTrainingConfig, +) -> HFAutoModel: + """Load and initialize the model for training. + Args: + model: The model identifier to load + device: The device to load the model onto + provider_config: Provider-specific configuration + Returns: + The loaded and initialized model + Raises: + RuntimeError: If model loading fails + """ + from typing import cast + + logger.info("Loading the base model") + try: + model_config = AutoConfig.from_pretrained(model, **provider_config.model_specific_config) + model_obj = AutoModelForCausalLM.from_pretrained( + model, + torch_dtype="auto" if device.type != "cpu" else "float32", + quantization_config=None, + config=model_config, + **provider_config.model_specific_config, + ) + # Always move model to specified device + model_obj = model_obj.to(device) # type: ignore[arg-type] + logger.info(f"Model loaded and moved to device: {model_obj.device}") + # Cast to HFAutoModel protocol - transformers models satisfy this interface + return cast(HFAutoModel, model_obj) + except Exception as e: + raise RuntimeError(f"Failed to load model: {str(e)}") from e + + +def split_dataset(ds: Dataset) -> tuple[Dataset, Dataset]: + """Split dataset into train and validation sets. + Args: + ds: Dataset to split + Returns: + tuple: (train_dataset, eval_dataset) + """ + logger.info("Splitting dataset into train and validation sets") + train_val_split = ds.train_test_split(test_size=0.1, seed=42) + train_dataset = train_val_split["train"] + eval_dataset = train_val_split["test"] + logger.info(f"Split dataset into {len(train_dataset)} training and {len(eval_dataset)} validation examples") + return train_dataset, eval_dataset + + +def setup_signal_handlers(): + """Setup signal handlers for graceful shutdown.""" + + def signal_handler(signum, frame): + logger.info(f"Received signal {signum}, initiating graceful shutdown") + sys.exit(0) + + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + + +def calculate_training_steps(steps_per_epoch: int, config: TrainingConfig) -> dict[str, int]: + """Calculate training steps and logging configuration. + Args: + steps_per_epoch: Number of training steps per epoch + config: Training configuration + Returns: + dict: Dictionary with calculated step values + """ + total_steps = steps_per_epoch * config.n_epochs + max_steps = min(config.max_steps_per_epoch, total_steps) + logging_steps = max(1, steps_per_epoch // 50) # Log 50 times per epoch + + logger.info("Training configuration:") + logger.info(f"- Steps per epoch: {steps_per_epoch}") + logger.info(f"- Total steps: {total_steps}") + logger.info(f"- Max steps: {max_steps}") + logger.info(f"- Logging steps: {logging_steps}") + + return {"total_steps": total_steps, "max_steps": max_steps, "logging_steps": logging_steps} + + +def get_save_strategy(output_dir_path: Path | None) -> tuple[str, str]: + """Get save and evaluation strategy based on output directory. + Args: + output_dir_path: Optional path to save the model + Returns: + tuple: (save_strategy, eval_strategy) + """ + if output_dir_path: + logger.info(f"Will save checkpoints to {output_dir_path}") + return "epoch", "epoch" + return "no", "no" + + +def create_checkpoints( + output_dir_path: Path, job_uuid: str, model: str, config: TrainingConfig, final_model_name: str +) -> list[Checkpoint]: + """Create checkpoint objects from training output. + Args: + output_dir_path: Path to the training output directory + job_uuid: Unique identifier for the training job + model: Model identifier + config: Training configuration + final_model_name: Name of the final model directory ("merged_model" for SFT, "dpo_model" for DPO) + Returns: + List of Checkpoint objects + """ + checkpoints = [] + + # Add checkpoint directories + checkpoint_dirs = sorted( + [d for d in output_dir_path.glob("checkpoint-*") if d.is_dir()], + key=lambda x: int(x.name.split("-")[1]), + ) + + for epoch_number, checkpoint_dir in enumerate(checkpoint_dirs, start=1): + created_time = datetime.fromtimestamp(os.path.getctime(checkpoint_dir), tz=UTC) + checkpoint = Checkpoint( + identifier=checkpoint_dir.name, + created_at=created_time, + epoch=epoch_number, + post_training_job_id=job_uuid, + path=str(checkpoint_dir), + ) + checkpoints.append(checkpoint) + + # Add final model + final_model_path = output_dir_path / final_model_name + if final_model_path.exists(): + training_type = "sft" if final_model_name == "merged_model" else "dpo" + checkpoint = Checkpoint( + identifier=f"{model}-{training_type}-{config.n_epochs}", + created_at=datetime.now(UTC), + epoch=config.n_epochs, + post_training_job_id=job_uuid, + path=str(final_model_path), + ) + checkpoints.append(checkpoint) + + return checkpoints diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/__init__.py rename to src/llama_stack/providers/inline/post_training/torchtune/__init__.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py similarity index 100% rename from llama_stack/providers/inline/agents/meta_reference/responses/__init__.py rename to src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py diff --git a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py b/src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py similarity index 99% rename from llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py rename to src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py index af8bd2765..43e206490 100644 --- a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py @@ -91,7 +91,7 @@ class TorchtuneCheckpointer: if checkpoint_format == "meta" or checkpoint_format is None: self._save_meta_format_checkpoint(model_file_path, state_dict, adapter_only) elif checkpoint_format == "huggingface": - # Note: for saving hugging face format checkpoints, we only suppport saving adapter weights now + # Note: for saving hugging face format checkpoints, we only support saving adapter weights now self._save_hf_format_checkpoint(model_file_path, state_dict) else: raise ValueError(f"Unsupported checkpoint format: {format}") diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py new file mode 100644 index 000000000..f929ea4dd --- /dev/null +++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py @@ -0,0 +1,99 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Copyright (c) Meta Platforms, IAny, nc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Callable + +import torch +from pydantic import BaseModel +from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages +from torchtune.models.llama3 import llama3_tokenizer +from torchtune.models.llama3._tokenizer import Llama3Tokenizer +from torchtune.models.llama3_1 import lora_llama3_1_8b +from torchtune.models.llama3_2 import lora_llama3_2_3b +from torchtune.modules.transforms import Transform + +from llama_stack.models.llama.sku_list import resolve_model +from llama_stack.models.llama.sku_types import Model +from llama_stack_api import DatasetFormat + +BuildLoraModelCallable = Callable[..., torch.nn.Module] +BuildTokenizerCallable = Callable[..., Llama3Tokenizer] + + +class ModelConfig(BaseModel): + model_definition: BuildLoraModelCallable + tokenizer_type: BuildTokenizerCallable + checkpoint_type: str + + +MODEL_CONFIGS: dict[str, ModelConfig] = { + "Llama3.2-3B-Instruct": ModelConfig( + model_definition=lora_llama3_2_3b, + tokenizer_type=llama3_tokenizer, + checkpoint_type="LLAMA3_2", + ), + "Llama3.1-8B-Instruct": ModelConfig( + model_definition=lora_llama3_1_8b, + tokenizer_type=llama3_tokenizer, + checkpoint_type="LLAMA3", + ), +} + +DATA_FORMATS: dict[str, Transform] = { + "instruct": InputOutputToMessages, + "dialog": ShareGPTToMessages, +} + + +def _validate_model_id(model_id: str) -> Model: + model = resolve_model(model_id) + if model is None or model.core_model_id.value not in MODEL_CONFIGS: + raise ValueError(f"Model {model_id} is not supported.") + return model + + +async def get_model_definition( + model_id: str, +) -> BuildLoraModelCallable: + model = _validate_model_id(model_id) + model_config = MODEL_CONFIGS[model.core_model_id.value] + if not hasattr(model_config, "model_definition"): + raise ValueError(f"Model {model_id} does not have model definition.") + return model_config.model_definition + + +async def get_tokenizer_type( + model_id: str, +) -> BuildTokenizerCallable: + model = _validate_model_id(model_id) + model_config = MODEL_CONFIGS[model.core_model_id.value] + if not hasattr(model_config, "tokenizer_type"): + raise ValueError(f"Model {model_id} does not have tokenizer_type.") + return model_config.tokenizer_type + + +async def get_checkpointer_model_type( + model_id: str, +) -> str: + """ + checkpointer model type is used in checkpointer for some special treatment on some specific model types + For example, llama3.2 model tied weights (https://github.com/pytorch/torchtune/blob/main/torchtune/training/checkpointing/_checkpointer.py#L1041) + """ + model = _validate_model_id(model_id) + model_config = MODEL_CONFIGS[model.core_model_id.value] + if not hasattr(model_config, "checkpoint_type"): + raise ValueError(f"Model {model_id} does not have checkpoint_type.") + return model_config.checkpoint_type + + +async def get_data_transform(data_format: DatasetFormat) -> Transform: + return DATA_FORMATS[data_format.value] diff --git a/llama_stack/providers/inline/post_training/torchtune/config.py b/src/llama_stack/providers/inline/post_training/torchtune/config.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/config.py rename to src/llama_stack/providers/inline/post_training/torchtune/config.py diff --git a/llama_stack/providers/inline/batches/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py similarity index 100% rename from llama_stack/providers/inline/batches/__init__.py rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py similarity index 95% rename from llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py index 96dd8b8dd..47452efa4 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py @@ -25,7 +25,7 @@ def llama_stack_instruct_to_torchtune_instruct( ) input_messages = json.loads(sample[ColumnName.chat_completion_input.value]) - assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message" + assert len(input_messages) == 1, "llama stack instruct dataset format only supports 1 user message" input_message = input_messages[0] assert "content" in input_message, "content not found in input message" diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/datasets/sft.py rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py new file mode 100644 index 000000000..515ff7b66 --- /dev/null +++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -0,0 +1,178 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from enum import Enum +from typing import Any + +from llama_stack.providers.inline.post_training.torchtune.config import ( + TorchtunePostTrainingConfig, +) +from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler +from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus +from llama_stack_api import ( + AlgorithmConfig, + Checkpoint, + DatasetIO, + Datasets, + DPOAlignmentConfig, + JobStatus, + ListPostTrainingJobsResponse, + LoraFinetuningConfig, + PostTrainingJob, + PostTrainingJobArtifactsResponse, + PostTrainingJobStatusResponse, + TrainingConfig, +) + + +class TrainingArtifactType(Enum): + CHECKPOINT = "checkpoint" + RESOURCES_STATS = "resources_stats" + + +_JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune" + + +class TorchtunePostTrainingImpl: + def __init__( + self, + config: TorchtunePostTrainingConfig, + datasetio_api: DatasetIO, + datasets: Datasets, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets + self._scheduler = Scheduler() + + async def shutdown(self) -> None: + await self._scheduler.shutdown() + + @staticmethod + def _checkpoint_to_artifact(checkpoint: Checkpoint) -> JobArtifact: + return JobArtifact( + type=TrainingArtifactType.CHECKPOINT.value, + name=checkpoint.identifier, + uri=checkpoint.path, + metadata=dict(checkpoint), + ) + + @staticmethod + def _resources_stats_to_artifact(resources_stats: dict[str, Any]) -> JobArtifact: + return JobArtifact( + type=TrainingArtifactType.RESOURCES_STATS.value, + name=TrainingArtifactType.RESOURCES_STATS.value, + metadata=resources_stats, + ) + + async def supervised_fine_tune( + self, + job_uuid: str, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + model: str, + checkpoint_dir: str | None, + algorithm_config: AlgorithmConfig | None, + ) -> PostTrainingJob: + if isinstance(algorithm_config, LoraFinetuningConfig): + + async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): + from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import ( + LoraFinetuningSingleDevice, + ) + + on_log_message_cb("Starting Lora finetuning") + + recipe = LoraFinetuningSingleDevice( + self.config, + job_uuid, + training_config, + hyperparam_search_config, + logger_config, + model, + checkpoint_dir, + algorithm_config, + self.datasetio_api, + self.datasets_api, + ) + await recipe.setup() + + resources_allocated, checkpoints = await recipe.train() + + on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) + for checkpoint in checkpoints: + artifact = self._checkpoint_to_artifact(checkpoint) + on_artifact_collected_cb(artifact) + + on_status_change_cb(SchedulerJobStatus.completed) + on_log_message_cb("Lora finetuning completed") + else: + raise NotImplementedError() + + job_uuid = self._scheduler.schedule(_JOB_TYPE_SUPERVISED_FINE_TUNE, job_uuid, handler) + return PostTrainingJob(job_uuid=job_uuid) + + async def preference_optimize( + self, + job_uuid: str, + finetuned_model: str, + algorithm_config: DPOAlignmentConfig, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + ) -> PostTrainingJob: + raise NotImplementedError() + + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: + return ListPostTrainingJobsResponse( + data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] + ) + + @staticmethod + def _get_artifacts_metadata_by_type(job, artifact_type): + return [artifact.metadata for artifact in job.artifacts if artifact.type == artifact_type] + + @classmethod + def _get_checkpoints(cls, job): + return cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.CHECKPOINT.value) + + @classmethod + def _get_resources_allocated(cls, job): + data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value) + return data[0] if data else None + + async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None: + job = self._scheduler.get_job(job_uuid) + + match job.status: + # TODO: Add support for other statuses to API + case SchedulerJobStatus.new | SchedulerJobStatus.scheduled: + status = JobStatus.scheduled + case SchedulerJobStatus.running: + status = JobStatus.in_progress + case SchedulerJobStatus.completed: + status = JobStatus.completed + case SchedulerJobStatus.failed: + status = JobStatus.failed + case _: + raise NotImplementedError() + + return PostTrainingJobStatusResponse( + job_uuid=job_uuid, + status=status, + scheduled_at=job.scheduled_at, + started_at=job.started_at, + completed_at=job.completed_at, + checkpoints=self._get_checkpoints(job), + resources_allocated=self._get_resources_allocated(job), + ) + + async def cancel_training_job(self, job_uuid: str) -> None: + self._scheduler.cancel(job_uuid) + + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None: + job = self._scheduler.get_job(job_uuid) + return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job)) diff --git a/llama_stack/providers/inline/datasetio/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py similarity index 100% rename from llama_stack/providers/inline/datasetio/__init__.py rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py similarity index 98% rename from llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 634cfe457..f5e5db415 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -32,17 +32,6 @@ from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup from torchtune.training.metric_logging import DiskLogger from tqdm import tqdm -from llama_stack.apis.common.training_types import PostTrainingMetric -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - Checkpoint, - DataConfig, - LoraFinetuningConfig, - OptimizerConfig, - QATFinetuningConfig, - TrainingConfig, -) from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.log import get_logger @@ -56,6 +45,17 @@ from llama_stack.providers.inline.post_training.torchtune.config import ( TorchtunePostTrainingConfig, ) from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset +from llama_stack_api import ( + Checkpoint, + DataConfig, + DatasetIO, + Datasets, + LoraFinetuningConfig, + OptimizerConfig, + PostTrainingMetric, + QATFinetuningConfig, + TrainingConfig, +) log = get_logger(name=__name__, category="post_training") @@ -193,7 +193,7 @@ class LoraFinetuningSingleDevice: log.info("Optimizer is initialized.") self._loss_fn = CEWithChunkedOutputLoss() - self._model.set_num_output_chunks(self._loss_fn.num_output_chunks) + self._model.set_num_output_chunks(self._loss_fn.num_output_chunks) # type: ignore[operator] log.info("Loss is initialized.") assert isinstance(self.training_config.data_config, DataConfig), "DataConfig must be initialized" @@ -284,7 +284,7 @@ class LoraFinetuningSingleDevice: if self._is_dora: for m in model.modules(): if hasattr(m, "initialize_dora_magnitude"): - m.initialize_dora_magnitude() + m.initialize_dora_magnitude() # type: ignore[operator] if lora_weights_state_dict: lora_missing, lora_unexpected = model.load_state_dict(lora_weights_state_dict, strict=False) else: @@ -353,7 +353,7 @@ class LoraFinetuningSingleDevice: dataset_type=self._data_format.value, ) - sampler = DistributedSampler( + sampler: DistributedSampler = DistributedSampler( ds, num_replicas=1, rank=0, @@ -389,7 +389,7 @@ class LoraFinetuningSingleDevice: num_training_steps=num_training_steps, last_epoch=last_epoch, ) - return lr_scheduler + return lr_scheduler # type: ignore[no-any-return] async def save_checkpoint(self, epoch: int) -> str: ckpt_dict = {} @@ -447,7 +447,7 @@ class LoraFinetuningSingleDevice: # free logits otherwise it peaks backward memory del logits - return loss + return loss # type: ignore[no-any-return] async def train(self) -> tuple[dict[str, Any], list[Checkpoint]]: """ diff --git a/llama_stack/providers/inline/eval/__init__.py b/src/llama_stack/providers/inline/safety/__init__.py similarity index 100% rename from llama_stack/providers/inline/eval/__init__.py rename to src/llama_stack/providers/inline/safety/__init__.py diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/src/llama_stack/providers/inline/safety/code_scanner/__init__.py similarity index 100% rename from llama_stack/providers/inline/safety/code_scanner/__init__.py rename to src/llama_stack/providers/inline/safety/code_scanner/__init__.py diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py similarity index 94% rename from llama_stack/providers/inline/safety/code_scanner/code_scanner.py rename to src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py index e1cd8c5e4..071fbe2dc 100644 --- a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py +++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py @@ -10,19 +10,20 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from codeshield.cs import CodeShieldScanResult -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( - RunShieldResponse, - Safety, - SafetyViolation, - ViolationLevel, -) -from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults -from llama_stack.apis.shields import Shield from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) +from llama_stack_api import ( + ModerationObject, + ModerationObjectResults, + OpenAIMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ViolationLevel, +) from .config import CodeScannerConfig @@ -101,7 +102,10 @@ class MetaReferenceCodeScannerSafetyImpl(Safety): metadata=metadata, ) - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: + if model is None: + raise ValueError("Code scanner moderation requires a model identifier.") + inputs = input if isinstance(input, list) else [input] results = [] diff --git a/llama_stack/providers/inline/safety/code_scanner/config.py b/src/llama_stack/providers/inline/safety/code_scanner/config.py similarity index 100% rename from llama_stack/providers/inline/safety/code_scanner/config.py rename to src/llama_stack/providers/inline/safety/code_scanner/config.py diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/src/llama_stack/providers/inline/safety/llama_guard/__init__.py similarity index 100% rename from llama_stack/providers/inline/safety/llama_guard/__init__.py rename to src/llama_stack/providers/inline/safety/llama_guard/__init__.py diff --git a/llama_stack/providers/inline/safety/llama_guard/config.py b/src/llama_stack/providers/inline/safety/llama_guard/config.py similarity index 100% rename from llama_stack/providers/inline/safety/llama_guard/config.py rename to src/llama_stack/providers/inline/safety/llama_guard/config.py diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py similarity index 97% rename from llama_stack/providers/inline/safety/llama_guard/llama_guard.py rename to src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 47c6ccbed..ff1536bea 100644 --- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -9,29 +9,29 @@ import uuid from string import Template from typing import Any -from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem -from llama_stack.apis.inference import ( - Inference, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIMessageParam, - OpenAIUserMessageParam, -) -from llama_stack.apis.safety import ( - RunShieldResponse, - Safety, - SafetyViolation, - ViolationLevel, -) -from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults -from llama_stack.apis.shields import Shield from llama_stack.core.datatypes import Api from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import Role from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) +from llama_stack_api import ( + ImageContentItem, + Inference, + ModerationObject, + ModerationObjectResults, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIMessageParam, + OpenAIUserMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ShieldsProtocolPrivate, + TextContentItem, + ViolationLevel, +) from .config import LlamaGuardConfig @@ -200,7 +200,10 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate): return await impl.run(messages) - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: + if model is None: + raise ValueError("Llama Guard moderation requires a model identifier.") + if isinstance(input, list): messages = input.copy() else: diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/src/llama_stack/providers/inline/safety/prompt_guard/__init__.py similarity index 100% rename from llama_stack/providers/inline/safety/prompt_guard/__init__.py rename to src/llama_stack/providers/inline/safety/prompt_guard/__init__.py diff --git a/llama_stack/providers/inline/safety/prompt_guard/config.py b/src/llama_stack/providers/inline/safety/prompt_guard/config.py similarity index 100% rename from llama_stack/providers/inline/safety/prompt_guard/config.py rename to src/llama_stack/providers/inline/safety/prompt_guard/config.py diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py similarity index 93% rename from llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py rename to src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py index 8ca96300f..51383da1b 100644 --- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py @@ -9,20 +9,20 @@ from typing import Any import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( +from llama_stack.core.utils.model_utils import model_local_dir +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str +from llama_stack_api import ( + ModerationObject, + OpenAIMessageParam, RunShieldResponse, Safety, SafetyViolation, + Shield, + ShieldsProtocolPrivate, ShieldStore, ViolationLevel, ) -from llama_stack.apis.safety.safety import ModerationObject -from llama_stack.apis.shields import Shield -from llama_stack.core.utils.model_utils import model_local_dir -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from .config import PromptGuardConfig, PromptGuardType @@ -63,7 +63,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate): return await self.shield.run(messages) - async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: raise NotImplementedError("run_moderation is not implemented for Prompt Guard") diff --git a/llama_stack/providers/inline/inference/__init__.py b/src/llama_stack/providers/inline/scoring/__init__.py similarity index 100% rename from llama_stack/providers/inline/inference/__init__.py rename to src/llama_stack/providers/inline/scoring/__init__.py diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/src/llama_stack/providers/inline/scoring/basic/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/__init__.py rename to src/llama_stack/providers/inline/scoring/basic/__init__.py diff --git a/llama_stack/providers/inline/scoring/basic/config.py b/src/llama_stack/providers/inline/scoring/basic/config.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/config.py rename to src/llama_stack/providers/inline/scoring/basic/config.py diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py new file mode 100644 index 000000000..cf5cb79ba --- /dev/null +++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py @@ -0,0 +1,127 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any + +from llama_stack.core.datatypes import Api +from llama_stack.providers.utils.common.data_schema_validator import ( + get_valid_schemas, + validate_dataset_schema, +) +from llama_stack_api import ( + DatasetIO, + Datasets, + ScoreBatchResponse, + ScoreResponse, + Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, + ScoringResult, +) + +from .config import BasicScoringConfig +from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn +from .scoring_fn.equality_scoring_fn import EqualityScoringFn +from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn +from .scoring_fn.regex_parser_math_response_scoring_fn import ( + RegexParserMathResponseScoringFn, +) +from .scoring_fn.regex_parser_scoring_fn import RegexParserScoringFn +from .scoring_fn.subset_of_scoring_fn import SubsetOfScoringFn + +FIXED_FNS = [ + EqualityScoringFn, + SubsetOfScoringFn, + RegexParserScoringFn, + RegexParserMathResponseScoringFn, + IfEvalScoringFn, + DocVQAScoringFn, +] + + +class BasicScoringImpl( + Scoring, + ScoringFunctionsProtocolPrivate, +): + def __init__( + self, + config: BasicScoringConfig, + datasetio_api: DatasetIO, + datasets_api: Datasets, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets_api + self.scoring_fn_id_impls = {} + + async def initialize(self) -> None: + for fn in FIXED_FNS: + impl = fn() + for fn_defs in impl.get_supported_scoring_fn_defs(): + self.scoring_fn_id_impls[fn_defs.identifier] = impl + + async def shutdown(self) -> None: ... + + async def list_scoring_functions(self) -> list[ScoringFn]: + scoring_fn_defs_list = [ + fn_def for impl in self.scoring_fn_id_impls.values() for fn_def in impl.get_supported_scoring_fn_defs() + ] + + for f in scoring_fn_defs_list: + assert f.identifier.startswith("basic"), "All basic scoring fn must have identifier prefixed with 'basic'! " + + return scoring_fn_defs_list + + async def register_scoring_function(self, function_def: ScoringFn) -> None: + raise NotImplementedError("Register scoring function not implemented yet") + + async def score_batch( + self, + dataset_id: str, + scoring_functions: dict[str, ScoringFnParams | None] = None, + save_results_dataset: bool = False, + ) -> ScoreBatchResponse: + dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) + validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value)) + + all_rows = await self.datasetio_api.iterrows( + dataset_id=dataset_id, + limit=-1, + ) + res = await self.score( + input_rows=all_rows.data, + scoring_functions=scoring_functions, + ) + if save_results_dataset: + # TODO: persist and register dataset on to server for reading + # self.datasets_api.register_dataset() + raise NotImplementedError("Save results dataset not implemented yet") + + return ScoreBatchResponse( + results=res.results, + ) + + async def score( + self, + input_rows: list[dict[str, Any]], + scoring_functions: dict[str, ScoringFnParams | None] = None, + ) -> ScoreResponse: + res = {} + for scoring_fn_id in scoring_functions.keys(): + if scoring_fn_id not in self.scoring_fn_id_impls: + raise ValueError(f"Scoring function {scoring_fn_id} is not supported.") + scoring_fn = self.scoring_fn_id_impls[scoring_fn_id] + scoring_fn_params = scoring_functions.get(scoring_fn_id, None) + score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params) + agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params) + res[scoring_fn_id] = ScoringResult( + score_rows=score_results, + aggregated_results=agg_results, + ) + + return ScoreResponse( + results=res, + ) diff --git a/llama_stack/providers/inline/post_training/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/__init__.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py similarity index 98% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py index b87974d08..e48bab8fa 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py @@ -8,9 +8,8 @@ import json import re from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.docvqa import docvqa diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py similarity index 92% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py index 60804330f..2e79240be 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.equality import equality diff --git a/llama_stack/providers/inline/post_training/common/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/common/__init__.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py similarity index 84% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py index aad3dfe26..a7305d13a 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py similarity index 84% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py index 9b24ff791..f7d2f32ae 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py similarity index 85% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py index adca0791d..a2ed1d695 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py similarity index 88% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py index 8b1bf5352..4e2b49a1f 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, + NumberType, RegexParserScoringFnParams, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py similarity index 94% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py index ea04331c9..df0cf52d9 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, + NumberType, RegexParserScoringFnParams, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py similarity index 84% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py index 9cae66fa6..1f143c4a6 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py similarity index 96% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py index 77f6176e6..33b1c5a31 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.ifeval import ( ifeval, diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py similarity index 94% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py index d765959a8..1f4f2f979 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py @@ -5,9 +5,8 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex from .fn_defs.regex_parser_math_response import ( diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py similarity index 93% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py index cb336e303..1cc74f874 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py @@ -6,9 +6,8 @@ import re from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow from .fn_defs.regex_parser_multiple_choice_answer import ( regex_parser_multiple_choice_answer, diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py similarity index 91% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py index d6e10e6c9..fe15a4972 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.subset_of import subset_of diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py b/src/llama_stack/providers/inline/scoring/basic/utils/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py rename to src/llama_stack/providers/inline/scoring/basic/utils/__init__.py diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py rename to src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py diff --git a/llama_stack/providers/inline/scoring/basic/utils/math_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/utils/math_utils.py rename to src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/braintrust/__init__.py rename to src/llama_stack/providers/inline/scoring/braintrust/__init__.py diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py similarity index 96% rename from llama_stack/providers/inline/scoring/braintrust/braintrust.py rename to src/llama_stack/providers/inline/scoring/braintrust/braintrust.py index 14810f706..cfa35547b 100644 --- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -19,25 +19,26 @@ from autoevals.ragas import ( ) from pydantic import BaseModel -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.scoring import ( - ScoreBatchResponse, - ScoreResponse, - Scoring, - ScoringResult, - ScoringResultRow, -) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams from llama_stack.core.datatypes import Api from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, validate_dataset_schema, validate_row_schema, ) from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics +from llama_stack_api import ( + DatasetIO, + Datasets, + ScoreBatchResponse, + ScoreResponse, + Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, + ScoringResult, + ScoringResultRow, +) from .config import BraintrustScoringConfig from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/src/llama_stack/providers/inline/scoring/braintrust/config.py similarity index 100% rename from llama_stack/providers/inline/scoring/braintrust/config.py rename to src/llama_stack/providers/inline/scoring/braintrust/config.py diff --git a/llama_stack/providers/inline/post_training/torchtune/common/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/common/__init__.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py similarity index 87% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py index 4fe07f822..b058305b4 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py index a1995cc4e..d619d38a8 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py index e8fe15259..34354a1fc 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py similarity index 87% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py index d9b129a8b..4092ccc4a 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py similarity index 87% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py index c1d7e855b..2b32b9eec 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py index 01ddd0dd0..4d6547002 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py index 55d89344a..739dfd7bd 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py index c621ecf7f..59ed5949b 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py similarity index 86% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py index 2e85c0c7c..96c36d226 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/llm_as_judge/__init__.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/config.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/config.py similarity index 100% rename from llama_stack/providers/inline/scoring/llm_as_judge/config.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/config.py diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py new file mode 100644 index 000000000..23e6ad705 --- /dev/null +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -0,0 +1,114 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any + +from llama_stack.core.datatypes import Api +from llama_stack.providers.utils.common.data_schema_validator import ( + get_valid_schemas, + validate_dataset_schema, +) +from llama_stack_api import ( + DatasetIO, + Datasets, + Inference, + ScoreBatchResponse, + ScoreResponse, + Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, + ScoringResult, +) + +from .config import LlmAsJudgeScoringConfig +from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn + +LLM_JUDGE_FN = LlmAsJudgeScoringFn + + +class LlmAsJudgeScoringImpl( + Scoring, + ScoringFunctionsProtocolPrivate, +): + def __init__( + self, + config: LlmAsJudgeScoringConfig, + datasetio_api: DatasetIO, + datasets_api: Datasets, + inference_api: Inference, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets_api + self.inference_api = inference_api + + async def initialize(self) -> None: + impl = LLM_JUDGE_FN(inference_api=self.inference_api) + self.llm_as_judge_fn = impl + + async def shutdown(self) -> None: ... + + async def list_scoring_functions(self) -> list[ScoringFn]: + scoring_fn_defs_list = self.llm_as_judge_fn.get_supported_scoring_fn_defs() + + for f in self.llm_as_judge_fn.get_supported_scoring_fn_defs(): + assert f.identifier.startswith("llm-as-judge"), ( + "All llm-as-judge scoring fn must have identifier prefixed with 'llm-as-judge'! " + ) + + return scoring_fn_defs_list + + async def register_scoring_function(self, function_def: ScoringFn) -> None: + self.llm_as_judge_fn.register_scoring_fn_def(function_def) + + async def unregister_scoring_function(self, scoring_fn_id: str) -> None: + self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id) + + async def score_batch( + self, + dataset_id: str, + scoring_functions: dict[str, ScoringFnParams | None] = None, + save_results_dataset: bool = False, + ) -> ScoreBatchResponse: + dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) + validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value)) + + all_rows = await self.datasetio_api.iterrows( + dataset_id=dataset_id, + limit=-1, + ) + res = await self.score( + input_rows=all_rows.data, + scoring_functions=scoring_functions, + ) + if save_results_dataset: + # TODO: persist and register dataset on to server for reading + # self.datasets_api.register_dataset() + raise NotImplementedError("Save results dataset not implemented yet") + + return ScoreBatchResponse( + results=res.results, + ) + + async def score( + self, + input_rows: list[dict[str, Any]], + scoring_functions: dict[str, ScoringFnParams | None] = None, + ) -> ScoreResponse: + res = {} + for scoring_fn_id in scoring_functions.keys(): + scoring_fn = self.llm_as_judge_fn + scoring_fn_params = scoring_functions.get(scoring_fn_id, None) + score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params) + agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params) + res[scoring_fn_id] = ScoringResult( + score_rows=score_results, + aggregated_results=agg_results, + ) + + return ScoreResponse( + results=res, + ) diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/safety/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/safety/__init__.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py similarity index 98% rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py index 074f1ff46..ed26169a5 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, LLMAsJudgeScoringFnParams, + NumberType, ScoringFn, ) diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py similarity index 80% rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py index 205e0bbf3..bffffd878 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py @@ -4,8 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn +from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn llm_as_judge_base = ScoringFn( identifier="llm-as-judge::base", diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py similarity index 93% rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index fbecb6e20..73ce82cda 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,10 +6,8 @@ import re from typing import Any -from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa from .fn_defs.llm_as_judge_base import llm_as_judge_base diff --git a/llama_stack/providers/inline/scoring/__init__.py b/src/llama_stack/providers/inline/tool_runtime/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/__init__.py rename to src/llama_stack/providers/inline/tool_runtime/__init__.py diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py new file mode 100644 index 000000000..60117dc3d --- /dev/null +++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import RagToolRuntimeConfig + + +async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]): + from .memory import MemoryToolRuntimeImpl + + impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/src/llama_stack/providers/inline/tool_runtime/rag/config.py similarity index 100% rename from llama_stack/providers/inline/tool_runtime/rag/config.py rename to src/llama_stack/providers/inline/tool_runtime/rag/config.py diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py similarity index 90% rename from llama_stack/providers/inline/tool_runtime/rag/context_retriever.py rename to src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py index 14cbec49d..240df199b 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py @@ -7,17 +7,18 @@ from jinja2 import Template -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam -from llama_stack.apis.tools.rag_tool import ( - DefaultRAGQueryGeneratorConfig, - LLMRAGQueryGeneratorConfig, - RAGQueryGenerator, - RAGQueryGeneratorConfig, -) from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) +from llama_stack_api import ( + DefaultRAGQueryGeneratorConfig, + InterleavedContent, + LLMRAGQueryGeneratorConfig, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIUserMessageParam, + RAGQueryGenerator, + RAGQueryGeneratorConfig, +) async def generate_rag_query( diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py similarity index 90% rename from llama_stack/providers/inline/tool_runtime/rag/memory.py rename to src/llama_stack/providers/inline/tool_runtime/rag/memory.py index dc3dfbbca..afb54a8a9 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -14,35 +14,31 @@ import httpx from fastapi import UploadFile from pydantic import TypeAdapter -from llama_stack.apis.common.content_types import ( +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str +from llama_stack.providers.utils.memory.vector_store import parse_data_url +from llama_stack_api import ( URL, + Files, + Inference, InterleavedContent, InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import Inference -from llama_stack.apis.tools import ( ListToolDefsResponse, + OpenAIFilePurpose, + QueryChunksResponse, RAGDocument, RAGQueryConfig, RAGQueryResult, - RAGToolRuntime, + TextContentItem, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, -) -from llama_stack.apis.vector_io import ( - QueryChunksResponse, VectorIO, VectorStoreChunkingStrategyStatic, VectorStoreChunkingStrategyStaticConfig, ) -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.memory.vector_store import parse_data_url from .config import RagToolRuntimeConfig from .context_retriever import generate_rag_query @@ -91,7 +87,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]: return content_str.encode("utf-8"), "text/plain" -class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime): +class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): def __init__( self, config: RagToolRuntimeConfig, @@ -119,7 +115,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti async def insert( self, documents: list[RAGDocument], - vector_db_id: str, + vector_store_id: str, chunk_size_in_tokens: int = 512, ) -> None: if not documents: @@ -158,14 +154,14 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti try: await self.vector_io_api.openai_attach_file_to_vector_store( - vector_store_id=vector_db_id, + vector_store_id=vector_store_id, file_id=created_file.id, attributes=doc.metadata, chunking_strategy=chunking_strategy, ) except Exception as e: log.error( - f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}" + f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}" ) continue @@ -176,10 +172,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti async def query( self, content: InterleavedContent, - vector_db_ids: list[str], + vector_store_ids: list[str], query_config: RAGQueryConfig | None = None, ) -> RAGQueryResult: - if not vector_db_ids: + if not vector_store_ids: raise ValueError( "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID." ) @@ -192,7 +188,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti ) tasks = [ self.vector_io_api.query_chunks( - vector_db_id=vector_db_id, + vector_store_id=vector_store_id, query=query, params={ "mode": query_config.mode, @@ -201,18 +197,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti "ranker": query_config.ranker, }, ) - for vector_db_id in vector_db_ids + for vector_store_id in vector_store_ids ] results: list[QueryChunksResponse] = await asyncio.gather(*tasks) chunks = [] scores = [] - for vector_db_id, result in zip(vector_db_ids, results, strict=False): + for vector_store_id, result in zip(vector_store_ids, results, strict=False): for chunk, score in zip(result.chunks, result.scores, strict=False): if not hasattr(chunk, "metadata") or chunk.metadata is None: chunk.metadata = {} - chunk.metadata["vector_db_id"] = vector_db_id + chunk.metadata["vector_store_id"] = vector_store_id chunks.append(chunk) scores.append(score) @@ -250,7 +246,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti metadata_keys_to_exclude_from_context = [ "token_count", "metadata_token_count", - "vector_db_id", + "vector_store_id", ] metadata_for_context = {} for k in chunk_metadata_keys_to_include_from_context: @@ -275,12 +271,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti "document_ids": [c.document_id for c in chunks[: len(picked)]], "chunks": [c.content for c in chunks[: len(picked)]], "scores": scores[: len(picked)], - "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], + "vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]], }, ) async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, ) -> ListToolDefsResponse: # Parameters are not listed since these methods are not yet invoked automatically # by the LLM. The method is only implemented so things like /tools can list without @@ -308,8 +307,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti ] ) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: - vector_db_ids = kwargs.get("vector_db_ids", []) + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: + vector_store_ids = kwargs.get("vector_store_ids", []) query_config = kwargs.get("query_config") if query_config: query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config) @@ -319,7 +320,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti query = kwargs["query"] result = await self.query( content=query, - vector_db_ids=vector_db_ids, + vector_store_ids=vector_store_ids, query_config=query_config, ) diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py b/src/llama_stack/providers/inline/vector_io/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py rename to src/llama_stack/providers/inline/vector_io/__init__.py diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py new file mode 100644 index 000000000..155b8a0cb --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import ChromaVectorIOConfig + + +async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): + from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter + + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py new file mode 100644 index 000000000..3897991f5 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class ChromaVectorIOConfig(BaseModel): + db_path: str + persistence: KVStoreReference = Field(description="Config for KV store backend") + + @classmethod + def sample_run_config( + cls, __distro_dir__: str, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any + ) -> dict[str, Any]: + return { + "db_path": db_path, + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma", + ).model_dump(exclude_none=True), + } diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py new file mode 100644 index 000000000..b834589e3 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import FaissVectorIOConfig + + +async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): + from .faiss import FaissVectorIOAdapter + + assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" + + impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py new file mode 100644 index 000000000..d516d9fe9 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class FaissVectorIOConfig(BaseModel): + persistence: KVStoreReference + + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return { + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::faiss", + ).model_dump(exclude_none=True) + } diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py similarity index 82% rename from llama_stack/providers/inline/vector_io/faiss/faiss.py rename to src/llama_stack/providers/inline/vector_io/faiss/faiss.py index 5e33d4ca3..91a17058b 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -10,21 +10,28 @@ import io import json from typing import Any -import faiss +import faiss # type: ignore[import-untyped] import numpy as np from numpy.typing import NDArray -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex +from llama_stack_api import ( + Chunk, + Files, + HealthResponse, + HealthStatus, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore from .config import FaissVectorIOConfig @@ -223,7 +230,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") async def register_vector_store(self, vector_store: VectorStore) -> None: - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.") key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" await self.kvstore.set(key=key, value=vector_store.model_dump_json()) @@ -239,7 +247,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco return [i.vector_store for i in self.cache.values()] async def unregister_vector_store(self, vector_store_id: str) -> None: - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.") if vector_store_id not in self.cache: return @@ -248,19 +257,40 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco del self.cache[vector_store_id] await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = self.cache.get(vector_db_id) + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] + + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: + raise VectorStoreNotFoundError(vector_store_id) + + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store=vector_store, + index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), + inference_api=self.inference_api, + ) + self.cache[vector_store_id] = index + return index + + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = self.cache.get(vector_store_id) if index is None: - raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}") + raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}") await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = self.cache.get(vector_db_id) + index = self.cache.get(vector_store_id) if index is None: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) return await index.query_chunks(query, params) diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py new file mode 100644 index 000000000..2f84769f3 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import MilvusVectorIOConfig + + +async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): + from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter + + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py new file mode 100644 index 000000000..14ddd2362 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class MilvusVectorIOConfig(BaseModel): + db_path: str + persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)") + consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") + + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return { + "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db", + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus", + ).model_dump(exclude_none=True), + } diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py new file mode 100644 index 000000000..145d19455 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import QdrantVectorIOConfig + + +async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): + from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + + assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py new file mode 100644 index 000000000..4251f2f39 --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import Any + +from pydantic import BaseModel + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + path: str + persistence: KVStoreReference + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: + return { + "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant", + ).model_dump(exclude_none=True), + } diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py new file mode 100644 index 000000000..e84c299dc --- /dev/null +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack_api import Api + +from .config import SQLiteVectorIOConfig + + +async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): + from .sqlite_vec import SQLiteVecVectorIOAdapter + + assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" + impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py similarity index 100% rename from llama_stack/providers/inline/vector_io/sqlite_vec/config.py rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py similarity index 93% rename from llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 37294f173..a384a33dc 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -11,18 +11,11 @@ import struct from typing import Any import numpy as np -import sqlite_vec +import sqlite_vec # type: ignore[import-untyped] from numpy.typing import NDArray -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, @@ -31,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator +from llama_stack_api import ( + Chunk, + Files, + Inference, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore logger = get_logger(name=__name__, category="vector_io") @@ -412,6 +416,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro return [v.vector_store for v in self.cache.values()] async def register_vector_store(self, vector_store: VectorStore) -> None: + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.") + + # Save to kvstore for persistence + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) + + # Create and cache the index index = await SQLiteVecIndex.create( vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) @@ -421,13 +433,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro if vector_store_id in self.cache: return self.cache[vector_store_id] - if self.vector_store_table is None: - raise VectorStoreNotFoundError(vector_store_id) - - vector_store = self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise VectorStoreNotFoundError(vector_store_id) + vector_store = VectorStore.model_validate_json(vector_store_data) index = VectorStoreWithIndex( vector_store=vector_store, index=SQLiteVecIndex( @@ -447,20 +462,20 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro await self.cache[vector_store_id].index.delete() del self.cache[vector_store_id] - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api # and then call our index's add_chunks. await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None + self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/registry/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py rename to src/llama_stack/providers/registry/__init__.py diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py new file mode 100644 index 000000000..22bb45faf --- /dev/null +++ b/src/llama_stack/providers/registry/agents.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack.core.storage.kvstore import kvstore_dependencies +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, +) + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.agents, + provider_type="inline::meta-reference", + pip_packages=[ + "matplotlib", + "pillow", + "pandas", + "scikit-learn", + "mcp>=1.8.1", + ] + + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config + module="llama_stack.providers.inline.agents.meta_reference", + config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig", + api_dependencies=[ + Api.inference, + Api.vector_io, + Api.tool_runtime, + Api.tool_groups, + Api.conversations, + Api.prompts, + Api.files, + ], + optional_api_dependencies=[ + Api.safety, + ], + description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", + ), + ] diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py new file mode 100644 index 000000000..e11bb8332 --- /dev/null +++ b/src/llama_stack/providers/registry/batches.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.batches, + provider_type="inline::reference", + pip_packages=[], + module="llama_stack.providers.inline.batches.reference", + config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig", + api_dependencies=[ + Api.inference, + Api.files, + Api.models, + ], + description="Reference implementation of batches API with KVStore persistence.", + ), + ] diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py new file mode 100644 index 000000000..bfd7ede3c --- /dev/null +++ b/src/llama_stack/providers/registry/datasetio.py @@ -0,0 +1,49 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, + RemoteProviderSpec, +) + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.datasetio, + provider_type="inline::localfs", + pip_packages=["pandas"], + module="llama_stack.providers.inline.datasetio.localfs", + config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig", + api_dependencies=[], + description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.", + ), + RemoteProviderSpec( + api=Api.datasetio, + adapter_type="huggingface", + provider_type="remote::huggingface", + pip_packages=[ + "datasets>=4.0.0", + ], + module="llama_stack.providers.remote.datasetio.huggingface", + config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig", + description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.", + ), + RemoteProviderSpec( + api=Api.datasetio, + adapter_type="nvidia", + provider_type="remote::nvidia", + module="llama_stack.providers.remote.datasetio.nvidia", + config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig", + pip_packages=[ + "datasets>=4.0.0", + ], + description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.", + ), + ] diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py new file mode 100644 index 000000000..9c8b1eebd --- /dev/null +++ b/src/llama_stack/providers/registry/eval.py @@ -0,0 +1,46 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.eval, + provider_type="inline::meta-reference", + pip_packages=["tree_sitter", "pythainlp", "langdetect", "emoji", "nltk"], + module="llama_stack.providers.inline.eval.meta_reference", + config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + Api.scoring, + Api.inference, + Api.agents, + ], + description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.", + ), + RemoteProviderSpec( + api=Api.eval, + adapter_type="nvidia", + pip_packages=[ + "requests", + ], + provider_type="remote::nvidia", + module="llama_stack.providers.remote.eval.nvidia", + config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig", + description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.", + api_dependencies=[ + Api.datasetio, + Api.datasets, + Api.scoring, + Api.inference, + Api.agents, + ], + ), + ] diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py new file mode 100644 index 000000000..8ce8acd91 --- /dev/null +++ b/src/llama_stack/providers/registry/files.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.core.storage.sqlstore.sqlstore import sql_store_pip_packages +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.files, + provider_type="inline::localfs", + # TODO: make this dynamic according to the sql store type + pip_packages=sql_store_pip_packages, + module="llama_stack.providers.inline.files.localfs", + config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig", + description="Local filesystem-based file storage provider for managing files and documents locally.", + ), + RemoteProviderSpec( + api=Api.files, + provider_type="remote::s3", + adapter_type="s3", + pip_packages=["boto3"] + sql_store_pip_packages, + module="llama_stack.providers.remote.files.s3", + config_class="llama_stack.providers.remote.files.s3.config.S3FilesImplConfig", + description="AWS S3-based file storage provider for scalable cloud file management with metadata persistence.", + ), + RemoteProviderSpec( + api=Api.files, + provider_type="remote::openai", + adapter_type="openai", + pip_packages=["openai"] + sql_store_pip_packages, + module="llama_stack.providers.remote.files.openai", + config_class="llama_stack.providers.remote.files.openai.config.OpenAIFilesImplConfig", + description="OpenAI Files API provider for managing files through OpenAI's native file storage service.", + ), + ] diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py new file mode 100644 index 000000000..819e5aff5 --- /dev/null +++ b/src/llama_stack/providers/registry/inference.py @@ -0,0 +1,316 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, + RemoteProviderSpec, +) + +META_REFERENCE_DEPS = [ + "accelerate", + "fairscale", + "torch", + "torchvision", + "transformers", + "zmq", + "lm-format-enforcer", + "sentence-transformers", + "torchao==0.8.0", + "fbgemm-gpu-genai==1.1.2", +] + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.inference, + provider_type="inline::meta-reference", + pip_packages=META_REFERENCE_DEPS, + module="llama_stack.providers.inline.inference.meta_reference", + config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", + description="Meta's reference implementation of inference with support for various model formats and optimization techniques.", + ), + InlineProviderSpec( + api=Api.inference, + provider_type="inline::sentence-transformers", + # CrossEncoder depends on torchao.quantization + pip_packages=[ + "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu", + "sentence-transformers --no-deps", + # required by some SentenceTransformers architectures for tensor rearrange/merge ops + "einops", + # fast HF tokenization backend used by SentenceTransformers models + "tokenizers", + # safe and fast file format for storing and loading tensors + "safetensors", + ], + module="llama_stack.providers.inline.inference.sentence_transformers", + config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", + description="Sentence Transformers inference provider for text embeddings and similarity search.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="cerebras", + provider_type="remote::cerebras", + pip_packages=[], + module="llama_stack.providers.remote.inference.cerebras", + config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.cerebras.config.CerebrasProviderDataValidator", + description="Cerebras inference provider for running models on Cerebras Cloud platform.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="ollama", + provider_type="remote::ollama", + pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], + config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", + module="llama_stack.providers.remote.inference.ollama", + description="Ollama inference provider for running local models through the Ollama runtime.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="vllm", + provider_type="remote::vllm", + pip_packages=[], + module="llama_stack.providers.remote.inference.vllm", + config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig", + provider_data_validator="llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator", + description="Remote vLLM inference provider for connecting to vLLM servers.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="tgi", + provider_type="remote::tgi", + pip_packages=["huggingface_hub", "aiohttp"], + module="llama_stack.providers.remote.inference.tgi", + config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig", + description="Text Generation Inference (TGI) provider for HuggingFace model serving.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="hf::serverless", + provider_type="remote::hf::serverless", + pip_packages=["huggingface_hub", "aiohttp"], + module="llama_stack.providers.remote.inference.tgi", + config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig", + description="HuggingFace Inference API serverless provider for on-demand model inference.", + ), + RemoteProviderSpec( + api=Api.inference, + provider_type="remote::hf::endpoint", + adapter_type="hf::endpoint", + pip_packages=["huggingface_hub", "aiohttp"], + module="llama_stack.providers.remote.inference.tgi", + config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig", + description="HuggingFace Inference Endpoints provider for dedicated model serving.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="fireworks", + provider_type="remote::fireworks", + pip_packages=[ + "fireworks-ai<=0.17.16", + ], + module="llama_stack.providers.remote.inference.fireworks", + config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator", + description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="together", + provider_type="remote::together", + pip_packages=[ + "together", + ], + module="llama_stack.providers.remote.inference.together", + config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", + description="Together AI inference provider for open-source models and collaborative AI development.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="bedrock", + provider_type="remote::bedrock", + pip_packages=[], + module="llama_stack.providers.remote.inference.bedrock", + config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", + provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator", + description="AWS Bedrock inference provider using OpenAI compatible endpoint.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="databricks", + provider_type="remote::databricks", + pip_packages=["databricks-sdk"], + module="llama_stack.providers.remote.inference.databricks", + config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.databricks.config.DatabricksProviderDataValidator", + description="Databricks inference provider for running models on Databricks' unified analytics platform.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="nvidia", + provider_type="remote::nvidia", + pip_packages=[], + module="llama_stack.providers.remote.inference.nvidia", + config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig", + provider_data_validator="llama_stack.providers.remote.inference.nvidia.config.NVIDIAProviderDataValidator", + description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="runpod", + provider_type="remote::runpod", + pip_packages=[], + module="llama_stack.providers.remote.inference.runpod", + config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.runpod.config.RunpodProviderDataValidator", + description="RunPod inference provider for running models on RunPod's cloud GPU platform.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="openai", + provider_type="remote::openai", + pip_packages=[], + module="llama_stack.providers.remote.inference.openai", + config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig", + provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", + description="OpenAI inference provider for accessing GPT models and other OpenAI services.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="anthropic", + provider_type="remote::anthropic", + pip_packages=["anthropic"], + module="llama_stack.providers.remote.inference.anthropic", + config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", + provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", + description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="gemini", + provider_type="remote::gemini", + pip_packages=[], + module="llama_stack.providers.remote.inference.gemini", + config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig", + provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator", + description="Google Gemini inference provider for accessing Gemini models and Google's AI services.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="vertexai", + provider_type="remote::vertexai", + pip_packages=[ + "google-cloud-aiplatform", + ], + module="llama_stack.providers.remote.inference.vertexai", + config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig", + provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator", + description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages: + +• Enterprise-grade security: Uses Google Cloud's security controls and IAM +• Better integration: Seamless integration with other Google Cloud services +• Advanced features: Access to additional Vertex AI features like model tuning and monitoring +• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys + +Configuration: +- Set VERTEX_AI_PROJECT environment variable (required) +- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1) +- Use Google Cloud Application Default Credentials or service account key + +Authentication Setup: +Option 1 (Recommended): gcloud auth application-default login +Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path + +Available Models: +- vertex_ai/gemini-2.0-flash +- vertex_ai/gemini-2.5-flash +- vertex_ai/gemini-2.5-pro""", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="groq", + provider_type="remote::groq", + pip_packages=[], + module="llama_stack.providers.remote.inference.groq", + config_class="llama_stack.providers.remote.inference.groq.GroqConfig", + provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", + description="Groq inference provider for ultra-fast inference using Groq's LPU technology.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="llama-openai-compat", + provider_type="remote::llama-openai-compat", + pip_packages=[], + module="llama_stack.providers.remote.inference.llama_openai_compat", + config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig", + provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", + description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="sambanova", + provider_type="remote::sambanova", + pip_packages=[], + module="llama_stack.providers.remote.inference.sambanova", + config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator", + description="SambaNova inference provider for running models on SambaNova's dataflow architecture.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="passthrough", + provider_type="remote::passthrough", + pip_packages=[], + module="llama_stack.providers.remote.inference.passthrough", + config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig", + provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator", + description="Passthrough inference provider for connecting to any external inference service not directly supported.", + ), + RemoteProviderSpec( + api=Api.inference, + adapter_type="watsonx", + provider_type="remote::watsonx", + pip_packages=["litellm"], + module="llama_stack.providers.remote.inference.watsonx", + config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", + provider_data_validator="llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator", + description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.", + ), + RemoteProviderSpec( + api=Api.inference, + provider_type="remote::azure", + adapter_type="azure", + pip_packages=[], + module="llama_stack.providers.remote.inference.azure", + config_class="llama_stack.providers.remote.inference.azure.AzureConfig", + provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator", + description=""" +Azure OpenAI inference provider for accessing GPT models and other Azure services. +Provider documentation +https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview +""", + ), + RemoteProviderSpec( + api=Api.inference, + provider_type="remote::oci", + adapter_type="oci", + pip_packages=["oci"], + module="llama_stack.providers.remote.inference.oci", + config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig", + provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator", + description=""" +Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. +Provider documentation +https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm +""", + ), + ] diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py new file mode 100644 index 000000000..a5529b714 --- /dev/null +++ b/src/llama_stack/providers/registry/post_training.py @@ -0,0 +1,69 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import cast + +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec + +# We provide two versions of these providers so that distributions can package the appropriate version of torch. +# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images. +torchtune_def = dict( + api=Api.post_training, + pip_packages=["numpy"], + module="llama_stack.providers.inline.post_training.torchtune", + config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + ], + description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.", +) + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + **{ # type: ignore + **torchtune_def, + "provider_type": "inline::torchtune-cpu", + "pip_packages": ( + cast(list[str], torchtune_def["pip_packages"]) + + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"] + ), + }, + ), + InlineProviderSpec( + **{ # type: ignore + **torchtune_def, + "provider_type": "inline::torchtune-gpu", + "pip_packages": ( + cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"] + ), + }, + ), + InlineProviderSpec( + api=Api.post_training, + provider_type="inline::huggingface-gpu", + pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"], + module="llama_stack.providers.inline.post_training.huggingface", + config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + ], + description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.", + ), + RemoteProviderSpec( + api=Api.post_training, + adapter_type="nvidia", + provider_type="remote::nvidia", + pip_packages=["requests", "aiohttp"], + module="llama_stack.providers.remote.post_training.nvidia", + config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig", + description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.", + ), + ] diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py new file mode 100644 index 000000000..c9dbbce24 --- /dev/null +++ b/src/llama_stack/providers/registry/safety.py @@ -0,0 +1,78 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, + RemoteProviderSpec, +) + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.safety, + provider_type="inline::prompt-guard", + pip_packages=[ + "transformers[accelerate]", + "torch --index-url https://download.pytorch.org/whl/cpu", + ], + module="llama_stack.providers.inline.safety.prompt_guard", + config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig", + description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.", + ), + InlineProviderSpec( + api=Api.safety, + provider_type="inline::llama-guard", + pip_packages=[], + module="llama_stack.providers.inline.safety.llama_guard", + config_class="llama_stack.providers.inline.safety.llama_guard.LlamaGuardConfig", + api_dependencies=[ + Api.inference, + ], + description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.", + ), + InlineProviderSpec( + api=Api.safety, + provider_type="inline::code-scanner", + pip_packages=[ + "codeshield", + ], + module="llama_stack.providers.inline.safety.code_scanner", + config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig", + description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.", + ), + RemoteProviderSpec( + api=Api.safety, + adapter_type="bedrock", + provider_type="remote::bedrock", + pip_packages=["boto3"], + module="llama_stack.providers.remote.safety.bedrock", + config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig", + description="AWS Bedrock safety provider for content moderation using AWS's safety services.", + ), + RemoteProviderSpec( + api=Api.safety, + adapter_type="nvidia", + provider_type="remote::nvidia", + pip_packages=["requests"], + module="llama_stack.providers.remote.safety.nvidia", + config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig", + description="NVIDIA's safety provider for content moderation and safety filtering.", + ), + RemoteProviderSpec( + api=Api.safety, + adapter_type="sambanova", + provider_type="remote::sambanova", + pip_packages=["litellm", "requests"], + module="llama_stack.providers.remote.safety.sambanova", + config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", + provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", + description="SambaNova's safety provider for content moderation and safety filtering.", + ), + ] diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py new file mode 100644 index 000000000..45c5dbed7 --- /dev/null +++ b/src/llama_stack/providers/registry/scoring.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.scoring, + provider_type="inline::basic", + pip_packages=["requests"], + module="llama_stack.providers.inline.scoring.basic", + config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + ], + description="Basic scoring provider for simple evaluation metrics and scoring functions.", + ), + InlineProviderSpec( + api=Api.scoring, + provider_type="inline::llm-as-judge", + pip_packages=[], + module="llama_stack.providers.inline.scoring.llm_as_judge", + config_class="llama_stack.providers.inline.scoring.llm_as_judge.LlmAsJudgeScoringConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + Api.inference, + ], + description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.", + ), + InlineProviderSpec( + api=Api.scoring, + provider_type="inline::braintrust", + pip_packages=["autoevals"], + module="llama_stack.providers.inline.scoring.braintrust", + config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + ], + provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator", + description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.", + ), + ] diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py new file mode 100644 index 000000000..d34312353 --- /dev/null +++ b/src/llama_stack/providers/registry/tool_runtime.py @@ -0,0 +1,87 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, + RemoteProviderSpec, +) + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.tool_runtime, + provider_type="inline::rag-runtime", + pip_packages=DEFAULT_VECTOR_IO_DEPS + + [ + "tqdm", + "numpy", + "scikit-learn", + "scipy", + "nltk", + "sentencepiece", + "transformers", + ], + module="llama_stack.providers.inline.tool_runtime.rag", + config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", + api_dependencies=[Api.vector_io, Api.inference, Api.files], + description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.", + ), + RemoteProviderSpec( + api=Api.tool_runtime, + adapter_type="brave-search", + provider_type="remote::brave-search", + module="llama_stack.providers.remote.tool_runtime.brave_search", + config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig", + pip_packages=["requests"], + provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator", + description="Brave Search tool for web search capabilities with privacy-focused results.", + ), + RemoteProviderSpec( + api=Api.tool_runtime, + adapter_type="bing-search", + provider_type="remote::bing-search", + module="llama_stack.providers.remote.tool_runtime.bing_search", + config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig", + pip_packages=["requests"], + provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator", + description="Bing Search tool for web search capabilities using Microsoft's search engine.", + ), + RemoteProviderSpec( + api=Api.tool_runtime, + adapter_type="tavily-search", + provider_type="remote::tavily-search", + module="llama_stack.providers.remote.tool_runtime.tavily_search", + config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig", + pip_packages=["requests"], + provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator", + description="Tavily Search tool for AI-optimized web search with structured results.", + ), + RemoteProviderSpec( + api=Api.tool_runtime, + adapter_type="wolfram-alpha", + provider_type="remote::wolfram-alpha", + module="llama_stack.providers.remote.tool_runtime.wolfram_alpha", + config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig", + pip_packages=["requests"], + provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator", + description="Wolfram Alpha tool for computational knowledge and mathematical calculations.", + ), + RemoteProviderSpec( + api=Api.tool_runtime, + adapter_type="model-context-protocol", + provider_type="remote::model-context-protocol", + module="llama_stack.providers.remote.tool_runtime.model_context_protocol", + config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig", + pip_packages=["mcp>=1.8.1"], + provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator", + description="Model Context Protocol (MCP) tool for standardized tool calling and context management.", + ), + ] diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py new file mode 100644 index 000000000..a00941586 --- /dev/null +++ b/src/llama_stack/providers/registry/vector_io.py @@ -0,0 +1,828 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, + RemoteProviderSpec, +) + +# Common dependencies for all vector IO providers that support document processing +DEFAULT_VECTOR_IO_DEPS = ["chardet", "pypdf"] + + +def available_providers() -> list[ProviderSpec]: + return [ + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::meta-reference", + pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.faiss", + config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", + deprecation_warning="Please use the `inline::faiss` provider instead.", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description="Meta's reference implementation of a vector database.", + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::faiss", + pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.faiss", + config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stack +- GPU support +- **Vector search** - FAISS supports pure vector similarity search using embeddings + +## Search Modes + +**Supported:** +- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings + +**Not Supported:** +- **Keyword Search** (`mode="keyword"`): Not supported by FAISS +- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS + +> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality. + +## Usage + +To use Faiss in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Faiss. +3. Start storing and querying vectors. + +## Installation + +You can install Faiss using pip: + +```bash +pip install faiss-cpu +``` +## Documentation +See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for +more details about Faiss in general. +""", + ), + # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a + # source distribution and the wheels are not available for all platforms. + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::sqlite-vec", + pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.sqlite_vec", + config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It +allows you to store and query vectors directly within an SQLite database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stacks +- Uses disk-based storage for persistence, allowing for larger vector storage + +### Comparison to Faiss + +The choice between Faiss and sqlite-vec should be made based on the needs of your application, +as they have different strengths. + +#### Choosing the Right Provider + +Scenario | Recommended Tool | Reason +-- |-----------------| -- +Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches +Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads +Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing +Large datasets | sqlite-vec | Disk-based storage for larger vector storage +Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration + +#### Empirical Example + +Consider the histogram below in which 10,000 randomly generated strings were inserted +in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. + +```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` + +You will notice that the average write time for `sqlite-vec` was 788ms, compared to +47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather +uniformly spread across the [1500, 100000] interval. + +Looking at each individual write in the order that the documents are inserted you'll see the increase in +write speed as Faiss reindexes the vectors after each write. +```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` + +In comparison, the read times for Faiss was on average 10% faster than sqlite-vec. +The modes of the two distributions highlight the differences much further where Faiss +will likely yield faster read performance. + +```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss read times +:width: 400px +``` + +## Usage + +To use sqlite-vec in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use SQLite-Vec. +3. Start storing and querying vectors. + +The SQLite-vec provider supports three search modes: + +1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings. +2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5. +3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates. + +Example with hybrid search: +```python +response = await vector_io.query_chunks( + vector_store_id="my_db", + query="your query here", + params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, +) + +# Using RRF ranker +response = await vector_io.query_chunks( + vector_store_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "rrf", "impact_factor": 60.0}, + }, +) + +# Using weighted ranker +response = await vector_io.query_chunks( + vector_store_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword + }, +) +``` + +Example with explicit vector search: +```python +response = await vector_io.query_chunks( + vector_store_id="my_db", + query="your query here", + params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, +) +``` + +Example with keyword search: +```python +response = await vector_io.query_chunks( + vector_store_id="my_db", + query="your query here", + params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, +) +``` + +## Supported Search Modes + +The SQLite vector store supports three search modes: + +1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks +2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks +3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker + +### Hybrid Search + +Hybrid search combines the strengths of both vector and keyword search by: +- Computing vector similarity scores +- Computing keyword match scores +- Using a ranker to combine these scores + +Two ranker types are supported: + +1. **RRF (Reciprocal Rank Fusion)**: + - Combines ranks from both vector and keyword results + - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results + - Good for balancing between vector and keyword results + - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks + +2. **Weighted**: + - Linearly combines normalized vector and keyword scores + - Uses an alpha parameter (0-1) to control the blend: + - alpha=0: Only use keyword scores + - alpha=1: Only use vector scores + - alpha=0.5: Equal weight to both (default) + +Example using RAGQueryConfig with different search modes: + +```python +from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker + +# Vector search +config = RAGQueryConfig(mode="vector", max_chunks=5) + +# Keyword search +config = RAGQueryConfig(mode="keyword", max_chunks=5) + +# Hybrid search with custom RRF ranker +config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=RRFRanker(impact_factor=50.0), # Custom impact factor +) + +# Hybrid search with weighted ranker +config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword +) + +# Hybrid search with default RRF ranker +config = RAGQueryConfig( + mode="hybrid", max_chunks=5 +) # Will use RRF with impact_factor=60.0 +``` + +Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored. + +## Installation + +You can install SQLite-Vec using pip: + +```bash +pip install sqlite-vec +``` + +## Documentation + +See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. + +[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). +""", + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::sqlite_vec", + pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.sqlite_vec", + config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", + deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +Please refer to the sqlite-vec provider documentation. +""", + ), + RemoteProviderSpec( + api=Api.vector_io, + adapter_type="chromadb", + provider_type="remote::chromadb", + pip_packages=["chromadb-client"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.remote.vector_io.chroma", + config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[Chroma](https://www.trychroma.com/) is an inline and remote vector +database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Chroma supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Chrome in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +You can install chroma using pip: + +```bash +pip install chromadb +``` + +## Documentation +See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. +""", + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::chromadb", + pip_packages=["chromadb"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.chroma", + config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[Chroma](https://www.trychroma.com/) is an inline and remote vector +database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Chroma supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Chrome in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +You can install chroma using pip: + +```bash +pip install chromadb +``` + +## Documentation +See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. + +""", + ), + RemoteProviderSpec( + api=Api.vector_io, + adapter_type="pgvector", + provider_type="remote::pgvector", + pip_packages=["psycopg2-binary"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.remote.vector_io.pgvector", + config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +## Features + +- Easy to use +- Fully integrated with Llama Stack + +There are three implementations of search for PGVectoIndex available: + +1. Vector Search: +- How it works: + - Uses PostgreSQL's vector extension (pgvector) to perform similarity search + - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics + - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance + +-Characteristics: + - Semantic understanding - finds documents similar in meaning even if they don't share keywords + - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions) + - Best for: Finding conceptually related content, handling synonyms, cross-language search + +2. Keyword Search +- How it works: + - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank + - Converts text to searchable tokens using to_tsvector('english', text). Default language is English. + - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score + +- Characteristics: + - Lexical matching - finds exact keyword matches and variations + - Uses GIN (Generalized Inverted Index) for fast text search performance + - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring + - Best for: Exact term matching, proper names, technical terms, Boolean-style queries + +3. Hybrid Search +- How it works: + - Combines both vector and keyword search results + - Runs both searches independently, then merges results using configurable reranking + +- Two reranking strategies available: + - Reciprocal Rank Fusion (RRF) - (default: 60.0) + - Weighted Average - (default: 0.5) + +- Characteristics: + - Best of both worlds: semantic understanding + exact matching + - Documents appearing in both searches get boosted scores + - Configurable balance between semantic and lexical matching + - Best for: General-purpose search where you want both precision and recall + +4. Database Schema +The PGVector implementation stores data optimized for all three search types: +CREATE TABLE vector_store_xxx ( + id TEXT PRIMARY KEY, + document JSONB, -- Original document + embedding vector(dimension), -- For vector search + content_text TEXT, -- Raw text content + tokenized_content TSVECTOR -- For keyword search +); + +-- Indexes for performance +CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search +-- Vector index created automatically by pgvector + +## Usage + +To use PGVector in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector). +3. Start storing and querying vectors. + +## This is an example how you can set up your environment for using PGVector + +1. Export env vars: +```bash +export ENABLE_PGVECTOR=true +export PGVECTOR_HOST=localhost +export PGVECTOR_PORT=5432 +export PGVECTOR_DB=llamastack +export PGVECTOR_USER=llamastack +export PGVECTOR_PASSWORD=llamastack +``` + +2. Create DB: +```bash +psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';" +psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;" +psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;" +``` + +## Installation + +You can install PGVector using docker: + +```bash +docker pull pgvector/pgvector:pg17 +``` +## Documentation +See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. +""", + ), + RemoteProviderSpec( + api=Api.vector_io, + adapter_type="weaviate", + provider_type="remote::weaviate", + pip_packages=["weaviate-client>=4.16.5"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.remote.vector_io.weaviate", + config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", + provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. +It allows you to store and query vectors directly within a Weaviate database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Weaviate supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Hybrid search +- Document storage +- Metadata filtering +- Multi-modal retrieval + + +## Usage + +To use Weaviate in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart). + +## Documentation +See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. +""", + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::qdrant", + pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.qdrant", + config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=r""" +[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in +> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. +> +> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] + + + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stack +- Apache 2.0 license terms +- Store embeddings and their metadata +- Supports search by + [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) + and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search +- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) +- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) +- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) + +## Usage + +To use Qdrant in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Qdrant. +3. Start storing and querying vectors. + +## Installation + +You can install Qdrant using docker: + +```bash +docker pull qdrant/qdrant +``` +## Documentation +See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. +""", + ), + RemoteProviderSpec( + api=Api.vector_io, + adapter_type="qdrant", + provider_type="remote::qdrant", + pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.remote.vector_io.qdrant", + config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +Please refer to the inline provider documentation. +""", + ), + RemoteProviderSpec( + api=Api.vector_io, + adapter_type="milvus", + provider_type="remote::milvus", + pip_packages=["pymilvus>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.remote.vector_io.milvus", + config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It +allows you to store and query vectors directly within a Milvus database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features + +- Easy to use +- Fully integrated with Llama Stack +- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations) + +## Usage + +To use Milvus in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Milvus. +3. Start storing and querying vectors. + +## Installation + +If you want to use inline Milvus, you can install: + +```bash +pip install pymilvus[milvus-lite] +``` + +If you want to use remote Milvus, you can install: + +```bash +pip install pymilvus +``` + +## Configuration + +In Llama Stack, Milvus can be configured in two ways: +- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage +- **Remote Configuration** - Connects to a remote Milvus server + +### Inline (Local) Configuration + +The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files: + +```yaml +vector_io: + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: ~/.llama/distributions/together/milvus_store.db +``` + +### Remote Configuration + +Remote configuration is suitable for larger data storage requirements: + +#### Standard Remote Connection + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "http://:" + token: ":" +``` + +#### TLS-Enabled Remote Connection (One-way TLS) + +For connections to Milvus instances with one-way TLS enabled: + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + server_pem_path: "/path/to/server.pem" +``` + +#### Mutual TLS (mTLS) Remote Connection + +For connections to Milvus instances with mutual TLS (mTLS) enabled: + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + ca_pem_path: "/path/to/ca.pem" + client_pem_path: "/path/to/client.pem" + client_key_path: "/path/to/client.key" +``` + +#### Key Parameters for TLS Configuration + +- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. +- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS). +- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). +- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). +- **`client_key_path`**: Path to the **client private key** file (required for mTLS). + +## Search Modes + +Milvus supports three different search modes for both inline and remote configurations: + +### Vector Search +Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content. + +```python +# Vector search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="What is machine learning?", + search_mode="vector", + max_num_results=5, +) +``` + +### Keyword Search +Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches. + +```python +# Keyword search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="Python programming language", + search_mode="keyword", + max_num_results=5, +) +``` + +### Hybrid Search +Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching. + +#### Basic Hybrid Search +```python +# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0) +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, +) +``` + +**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009). + +#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker +RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results. + +```python +# Hybrid search with custom RRF parameters +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "rrf", + "impact_factor": 100.0, # Higher values give more weight to top-ranked results + } + }, +) +``` + +#### Hybrid Search with Weighted Ranker +Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods. + +```python +# Hybrid search with weighted ranker +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "weighted", + "alpha": 0.7, # 70% vector search, 30% keyword search + } + }, +) +``` + +For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md). + +## Documentation +See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. + +For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). +""", + ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::milvus", + pip_packages=["pymilvus[milvus-lite]>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS, + module="llama_stack.providers.inline.vector_io.milvus", + config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig", + api_dependencies=[Api.inference], + optional_api_dependencies=[Api.files, Api.models], + description=""" +Please refer to the remote provider documentation. +""", + ), + ] diff --git a/llama_stack/providers/inline/scoring/basic/utils/__init__.py b/src/llama_stack/providers/remote/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/basic/utils/__init__.py rename to src/llama_stack/providers/remote/__init__.py diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py b/src/llama_stack/providers/remote/agents/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py rename to src/llama_stack/providers/remote/agents/__init__.py diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/remote/datasetio/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py rename to src/llama_stack/providers/remote/datasetio/__init__.py diff --git a/llama_stack/providers/remote/datasetio/huggingface/__init__.py b/src/llama_stack/providers/remote/datasetio/huggingface/__init__.py similarity index 100% rename from llama_stack/providers/remote/datasetio/huggingface/__init__.py rename to src/llama_stack/providers/remote/datasetio/huggingface/__init__.py diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/src/llama_stack/providers/remote/datasetio/huggingface/config.py similarity index 100% rename from llama_stack/providers/remote/datasetio/huggingface/config.py rename to src/llama_stack/providers/remote/datasetio/huggingface/config.py diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py similarity index 92% rename from llama_stack/providers/remote/datasetio/huggingface/huggingface.py rename to src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index a34e354bf..26390a63b 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -6,12 +6,9 @@ from typing import Any from urllib.parse import parse_qs, urlparse -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset -from llama_stack.providers.datatypes import DatasetsProtocolPrivate -from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.providers.utils.pagination import paginate_records +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse from .config import HuggingfaceDatasetIOConfig diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md new file mode 100644 index 000000000..7b9f39141 --- /dev/null +++ b/src/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -0,0 +1,74 @@ +# NVIDIA DatasetIO Provider for LlamaStack + +This provider enables dataset management using NVIDIA's NeMo Customizer service. + +## Features + +- Register datasets for fine-tuning LLMs +- Unregister datasets + +## Getting Started + +### Prerequisites + +- LlamaStack with NVIDIA configuration +- Access to Hosted NVIDIA NeMo Microservice +- API key for authentication with the NVIDIA service + +### Setup + +Build the NVIDIA environment: + +```bash +uv pip install llama-stack-client +uv run llama stack list-deps nvidia | xargs -L1 uv pip install +``` + +### Basic Usage using the LlamaStack Python Client + +#### Initialize the client + +```python +import os + +os.environ["NVIDIA_API_KEY"] = "your-api-key" +os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" +os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" +os.environ["NVIDIA_PROJECT_ID"] = "test-project" +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() +``` + +#### Register a dataset + +```python +client.datasets.register( + purpose="post-training/messages", + dataset_id="my-training-dataset", + source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"}, + metadata={ + "format": "json", + "description": "Dataset for LLM fine-tuning", + "provider": "nvidia", + }, +) +``` + +#### Get a list of all registered datasets + +```python +datasets = client.datasets.list() +for dataset in datasets: + print(f"Dataset ID: {dataset.identifier}") + print(f"Description: {dataset.metadata.get('description', '')}") + print(f"Source: {dataset.source.uri}") + print("---") +``` + +#### Unregister a dataset + +```python +client.datasets.unregister(dataset_id="my-training-dataset") +``` diff --git a/llama_stack/providers/remote/datasetio/nvidia/__init__.py b/src/llama_stack/providers/remote/datasetio/nvidia/__init__.py similarity index 100% rename from llama_stack/providers/remote/datasetio/nvidia/__init__.py rename to src/llama_stack/providers/remote/datasetio/nvidia/__init__.py diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/src/llama_stack/providers/remote/datasetio/nvidia/config.py similarity index 100% rename from llama_stack/providers/remote/datasetio/nvidia/config.py rename to src/llama_stack/providers/remote/datasetio/nvidia/config.py diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py new file mode 100644 index 000000000..2f5548fa9 --- /dev/null +++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -0,0 +1,113 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +import aiohttp + +from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType + +from .config import NvidiaDatasetIOConfig + + +class NvidiaDatasetIOAdapter: + """Nvidia NeMo DatasetIO API.""" + + def __init__(self, config: NvidiaDatasetIOConfig): + self.config = config + self.headers = {} + + async def _make_request( + self, + method: str, + path: str, + headers: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + json: dict[str, Any] | None = None, + **kwargs, + ) -> dict[str, Any]: + """Helper method to make HTTP requests to the Customizer API.""" + url = f"{self.config.datasets_url}{path}" + request_headers = self.headers.copy() + + # Set default Content-Type for JSON requests + if json is not None: + request_headers["Content-Type"] = "application/json" + + if headers: + request_headers.update(headers) + + async with aiohttp.ClientSession(headers=request_headers) as session: + async with session.request(method, url, params=params, json=json, **kwargs) as response: + if response.status != 200: + error_data = await response.json() + raise Exception(f"API request failed: {error_data}") + return await response.json() + + async def register_dataset( + self, + dataset_def: Dataset, + ) -> Dataset: + """Register a new dataset. + + Args: + dataset_def [Dataset]: The dataset definition. + dataset_id [str]: The ID of the dataset. + source [DataSource]: The source of the dataset. + metadata [Dict[str, Any]]: The metadata of the dataset. + format [str]: The format of the dataset. + description [str]: The description of the dataset. + Returns: + Dataset + """ + # add warnings for unsupported params + request_body = { + "name": dataset_def.identifier, + "namespace": self.config.dataset_namespace, + "files_url": dataset_def.source.uri, + "project": self.config.project_id, + } + if dataset_def.metadata: + request_body["format"] = dataset_def.metadata.get("format") + request_body["description"] = dataset_def.metadata.get("description") + await self._make_request( + "POST", + "/v1/datasets", + json=request_body, + ) + return dataset_def + + async def update_dataset( + self, + dataset_id: str, + dataset_schema: dict[str, ParamType], + url: URL, + provider_dataset_id: str | None = None, + provider_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + raise NotImplementedError("Not implemented") + + async def unregister_dataset( + self, + dataset_id: str, + ) -> None: + await self._make_request( + "DELETE", + f"/v1/datasets/{self.config.dataset_namespace}/{dataset_id}", + headers={"Accept": "application/json", "Content-Type": "application/json"}, + ) + + async def iterrows( + self, + dataset_id: str, + start_index: int | None = None, + limit: int | None = None, + ) -> PaginatedResponse: + raise NotImplementedError("Not implemented") + + async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: + raise NotImplementedError("Not implemented") diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py b/src/llama_stack/providers/remote/eval/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py rename to src/llama_stack/providers/remote/eval/__init__.py diff --git a/llama_stack/providers/remote/eval/nvidia/README.md b/src/llama_stack/providers/remote/eval/nvidia/README.md similarity index 100% rename from llama_stack/providers/remote/eval/nvidia/README.md rename to src/llama_stack/providers/remote/eval/nvidia/README.md diff --git a/llama_stack/providers/remote/eval/nvidia/__init__.py b/src/llama_stack/providers/remote/eval/nvidia/__init__.py similarity index 100% rename from llama_stack/providers/remote/eval/nvidia/__init__.py rename to src/llama_stack/providers/remote/eval/nvidia/__init__.py diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/src/llama_stack/providers/remote/eval/nvidia/config.py similarity index 100% rename from llama_stack/providers/remote/eval/nvidia/config.py rename to src/llama_stack/providers/remote/eval/nvidia/config.py diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py new file mode 100644 index 000000000..5802cb098 --- /dev/null +++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py @@ -0,0 +1,168 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any + +import requests + +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack_api import ( + Agents, + Benchmark, + BenchmarkConfig, + BenchmarksProtocolPrivate, + DatasetIO, + Datasets, + Eval, + EvaluateResponse, + Inference, + Job, + JobStatus, + Scoring, + ScoringResult, +) + +from .config import NVIDIAEvalConfig + +DEFAULT_NAMESPACE = "nvidia" + + +class NVIDIAEvalImpl( + Eval, + BenchmarksProtocolPrivate, + ModelRegistryHelper, +): + def __init__( + self, + config: NVIDIAEvalConfig, + datasetio_api: DatasetIO, + datasets_api: Datasets, + scoring_api: Scoring, + inference_api: Inference, + agents_api: Agents, + ) -> None: + self.config = config + self.datasetio_api = datasetio_api + self.datasets_api = datasets_api + self.scoring_api = scoring_api + self.inference_api = inference_api + self.agents_api = agents_api + + ModelRegistryHelper.__init__(self) + + async def initialize(self) -> None: ... + + async def shutdown(self) -> None: ... + + async def _evaluator_get(self, path: str): + """Helper for making GET requests to the evaluator service.""" + response = requests.get(url=f"{self.config.evaluator_url}{path}") + response.raise_for_status() + return response.json() + + async def _evaluator_post(self, path: str, data: dict[str, Any]): + """Helper for making POST requests to the evaluator service.""" + response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data) + response.raise_for_status() + return response.json() + + async def _evaluator_delete(self, path: str) -> None: + """Helper for making DELETE requests to the evaluator service.""" + response = requests.delete(url=f"{self.config.evaluator_url}{path}") + response.raise_for_status() + + async def register_benchmark(self, task_def: Benchmark) -> None: + """Register a benchmark as an evaluation configuration.""" + await self._evaluator_post( + "/v1/evaluation/configs", + { + "namespace": DEFAULT_NAMESPACE, + "name": task_def.benchmark_id, + # metadata is copied to request body as-is + **task_def.metadata, + }, + ) + + async def unregister_benchmark(self, benchmark_id: str) -> None: + """Unregister a benchmark evaluation configuration from NeMo Evaluator.""" + await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}") + + async def run_eval( + self, + benchmark_id: str, + benchmark_config: BenchmarkConfig, + ) -> Job: + """Run an evaluation job for a benchmark.""" + model = ( + benchmark_config.eval_candidate.model + if benchmark_config.eval_candidate.type == "model" + else benchmark_config.eval_candidate.config.model + ) + nvidia_model = self.get_provider_model_id(model) or model + + result = await self._evaluator_post( + "/v1/evaluation/jobs", + { + "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}", + "target": {"type": "model", "model": nvidia_model}, + }, + ) + + return Job(job_id=result["id"], status=JobStatus.in_progress) + + async def evaluate_rows( + self, + benchmark_id: str, + input_rows: list[dict[str, Any]], + scoring_functions: list[str], + benchmark_config: BenchmarkConfig, + ) -> EvaluateResponse: + raise NotImplementedError() + + async def job_status(self, benchmark_id: str, job_id: str) -> Job: + """Get the status of an evaluation job. + + EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed". + JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed" + """ + result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}") + result_status = result["status"] + + job_status = JobStatus.failed + if result_status in ["created", "pending"]: + job_status = JobStatus.scheduled + elif result_status in ["running"]: + job_status = JobStatus.in_progress + elif result_status in ["completed"]: + job_status = JobStatus.completed + elif result_status in ["cancelled"]: + job_status = JobStatus.cancelled + + return Job(job_id=job_id, status=job_status) + + async def job_cancel(self, benchmark_id: str, job_id: str) -> None: + """Cancel the evaluation job.""" + await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {}) + + async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: + """Returns the results of the evaluation job.""" + + job = await self.job_status(benchmark_id, job_id) + status = job.status + if not status or status != JobStatus.completed: + raise ValueError(f"Job {job_id} not completed. Status: {status.value}") + + result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results") + + return EvaluateResponse( + # TODO: these are stored in detailed results on NeMo Evaluator side; can be added + generations=[], + scores={ + benchmark_id: ScoringResult( + score_rows=[], + aggregated_results=result, + ) + }, + ) diff --git a/src/llama_stack/providers/remote/files/openai/__init__.py b/src/llama_stack/providers/remote/files/openai/__init__.py new file mode 100644 index 000000000..58f86ecfd --- /dev/null +++ b/src/llama_stack/providers/remote/files/openai/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import AccessRule, Api + +from .config import OpenAIFilesImplConfig + + +async def get_adapter_impl(config: OpenAIFilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None): + from .files import OpenAIFilesImpl + + impl = OpenAIFilesImpl(config, policy or []) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/files/openai/config.py b/src/llama_stack/providers/remote/files/openai/config.py new file mode 100644 index 000000000..a38031e41 --- /dev/null +++ b/src/llama_stack/providers/remote/files/openai/config.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import SqlStoreReference + + +class OpenAIFilesImplConfig(BaseModel): + """Configuration for OpenAI Files API provider.""" + + api_key: str = Field(description="OpenAI API key for authentication") + metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata") + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: + return { + "api_key": "${env.OPENAI_API_KEY}", + "metadata_store": SqlStoreReference( + backend="sql_default", + table_name="openai_files_metadata", + ).model_dump(exclude_none=True), + } diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py new file mode 100644 index 000000000..2cfd44168 --- /dev/null +++ b/src/llama_stack/providers/remote/files/openai/files.py @@ -0,0 +1,239 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import UTC, datetime +from typing import Annotated, Any + +from fastapi import Depends, File, Form, Response, UploadFile + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, + Order, + ResourceNotFoundError, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType +from openai import OpenAI + +from .config import OpenAIFilesImplConfig + + +def _make_file_object( + *, + id: str, + filename: str, + purpose: str, + bytes: int, + created_at: int, + expires_at: int, + **kwargs: Any, +) -> OpenAIFileObject: + """ + Construct an OpenAIFileObject and normalize expires_at. + + If expires_at is greater than the max we treat it as no-expiration and + return None for expires_at. + """ + obj = OpenAIFileObject( + id=id, + filename=filename, + purpose=OpenAIFilePurpose(purpose), + bytes=bytes, + created_at=created_at, + expires_at=expires_at, + ) + + if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX): + obj.expires_at = None # type: ignore + + return obj + + +class OpenAIFilesImpl(Files): + """OpenAI Files API implementation.""" + + def __init__(self, config: OpenAIFilesImplConfig, policy: list[AccessRule]) -> None: + self._config = config + self.policy = policy + self._client: OpenAI | None = None + self._sql_store: AuthorizedSqlStore | None = None + + def _now(self) -> int: + """Return current UTC timestamp as int seconds.""" + return int(datetime.now(UTC).timestamp()) + + async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]: + where: dict[str, str | dict] = {"id": file_id} + if not return_expired: + where["expires_at"] = {">": self._now()} + if not (row := await self.sql_store.fetch_one("openai_files", where=where)): + raise ResourceNotFoundError(file_id, "File", "files.list()") + return row + + async def _delete_file(self, file_id: str) -> None: + """Delete a file from OpenAI and the database.""" + try: + self.client.files.delete(file_id) + except Exception as e: + # If file doesn't exist on OpenAI side, just remove from metadata store + if "not found" not in str(e).lower(): + raise RuntimeError(f"Failed to delete file from OpenAI: {e}") from e + + await self.sql_store.delete("openai_files", where={"id": file_id}) + + async def _delete_if_expired(self, file_id: str) -> None: + """If the file exists and is expired, delete it.""" + if row := await self._get_file(file_id, return_expired=True): + if (expires_at := row.get("expires_at")) and expires_at <= self._now(): + await self._delete_file(file_id) + + async def initialize(self) -> None: + self._client = OpenAI(api_key=self._config.api_key) + + self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy) + await self._sql_store.create_table( + "openai_files", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "filename": ColumnType.STRING, + "purpose": ColumnType.STRING, + "bytes": ColumnType.INTEGER, + "created_at": ColumnType.INTEGER, + "expires_at": ColumnType.INTEGER, + }, + ) + + async def shutdown(self) -> None: + pass + + @property + def client(self) -> OpenAI: + assert self._client is not None, "Provider not initialized" + return self._client + + @property + def sql_store(self) -> AuthorizedSqlStore: + assert self._sql_store is not None, "Provider not initialized" + return self._sql_store + + async def openai_upload_file( + self, + file: Annotated[UploadFile, File()], + purpose: Annotated[OpenAIFilePurpose, Form()], + expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, + ) -> OpenAIFileObject: + filename = getattr(file, "filename", None) or "uploaded_file" + content = await file.read() + file_size = len(content) + + created_at = self._now() + + expires_at = created_at + ExpiresAfter.MAX * 42 + if purpose == OpenAIFilePurpose.BATCH: + expires_at = created_at + ExpiresAfter.MAX + + if expires_after is not None: + expires_at = created_at + expires_after.seconds + + try: + from io import BytesIO + + file_obj = BytesIO(content) + file_obj.name = filename + + response = self.client.files.create( + file=file_obj, + purpose=purpose.value, + ) + + file_id = response.id + + entry: dict[str, Any] = { + "id": file_id, + "filename": filename, + "purpose": purpose.value, + "bytes": file_size, + "created_at": created_at, + "expires_at": expires_at, + } + + await self.sql_store.insert("openai_files", entry) + + return _make_file_object(**entry) + + except Exception as e: + raise RuntimeError(f"Failed to upload file to OpenAI: {e}") from e + + async def openai_list_files( + self, + after: str | None = None, + limit: int | None = 10000, + order: Order | None = Order.desc, + purpose: OpenAIFilePurpose | None = None, + ) -> ListOpenAIFileResponse: + if not order: + order = Order.desc + + where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}} + if purpose: + where_conditions["purpose"] = purpose.value + + paginated_result = await self.sql_store.fetch_all( + table="openai_files", + where=where_conditions, + order_by=[("created_at", order.value)], + cursor=("id", after) if after else None, + limit=limit, + ) + + files = [_make_file_object(**row) for row in paginated_result.data] + + return ListOpenAIFileResponse( + data=files, + has_more=paginated_result.has_more, + first_id=files[0].id if files else "", + last_id=files[-1].id if files else "", + ) + + async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: + await self._delete_if_expired(file_id) + row = await self._get_file(file_id) + return _make_file_object(**row) + + async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: + await self._delete_if_expired(file_id) + _ = await self._get_file(file_id) + await self._delete_file(file_id) + return OpenAIFileDeleteResponse(id=file_id, deleted=True) + + async def openai_retrieve_file_content(self, file_id: str) -> Response: + await self._delete_if_expired(file_id) + + row = await self._get_file(file_id) + + try: + response = self.client.files.content(file_id) + file_content = response.content + + except Exception as e: + if "not found" in str(e).lower(): + await self._delete_file(file_id) + raise ResourceNotFoundError(file_id, "File", "files.list()") from e + raise RuntimeError(f"Failed to download file from OpenAI: {e}") from e + + return Response( + content=file_content, + media_type="application/octet-stream", + headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'}, + ) diff --git a/llama_stack/providers/remote/files/s3/README.md b/src/llama_stack/providers/remote/files/s3/README.md similarity index 100% rename from llama_stack/providers/remote/files/s3/README.md rename to src/llama_stack/providers/remote/files/s3/README.md diff --git a/llama_stack/providers/remote/files/s3/__init__.py b/src/llama_stack/providers/remote/files/s3/__init__.py similarity index 100% rename from llama_stack/providers/remote/files/s3/__init__.py rename to src/llama_stack/providers/remote/files/s3/__init__.py diff --git a/llama_stack/providers/remote/files/s3/config.py b/src/llama_stack/providers/remote/files/s3/config.py similarity index 100% rename from llama_stack/providers/remote/files/s3/config.py rename to src/llama_stack/providers/remote/files/s3/config.py diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py new file mode 100644 index 000000000..ec2d8b952 --- /dev/null +++ b/src/llama_stack/providers/remote/files/s3/files.py @@ -0,0 +1,319 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import uuid +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Annotated, Any, cast + +import boto3 +from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError +from fastapi import Depends, File, Form, Response, UploadFile + +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, + Order, + ResourceNotFoundError, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType + +from .config import S3FilesImplConfig + +# TODO: provider data for S3 credentials + + +def _create_s3_client(config: S3FilesImplConfig) -> "S3Client": + try: + s3_config = { + "region_name": config.region, + } + + # endpoint URL if specified (for MinIO, LocalStack, etc.) + if config.endpoint_url: + s3_config["endpoint_url"] = config.endpoint_url + + if config.aws_access_key_id and config.aws_secret_access_key: + s3_config.update( + { + "aws_access_key_id": config.aws_access_key_id, + "aws_secret_access_key": config.aws_secret_access_key, + } + ) + + # Both cast and type:ignore are needed here: + # - cast tells mypy the return type for downstream usage (S3Client vs generic client) + # - type:ignore suppresses the call-overload error from boto3's complex overloaded signatures + return cast("S3Client", boto3.client("s3", **s3_config)) # type: ignore[call-overload] + + except (BotoCoreError, NoCredentialsError) as e: + raise RuntimeError(f"Failed to initialize S3 client: {e}") from e + + +async def _create_bucket_if_not_exists(client: "S3Client", config: S3FilesImplConfig) -> None: + try: + client.head_bucket(Bucket=config.bucket_name) + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "404": + if not config.auto_create_bucket: + raise RuntimeError( + f"S3 bucket '{config.bucket_name}' does not exist. " + f"Either create the bucket manually or set 'auto_create_bucket: true' in your configuration." + ) from e + try: + # For us-east-1, we can't specify LocationConstraint + if config.region == "us-east-1": + client.create_bucket(Bucket=config.bucket_name) + else: + client.create_bucket( + Bucket=config.bucket_name, + CreateBucketConfiguration=cast(Any, {"LocationConstraint": config.region}), + ) + except ClientError as create_error: + raise RuntimeError( + f"Failed to create S3 bucket '{config.bucket_name}': {create_error}" + ) from create_error + elif error_code == "403": + raise RuntimeError(f"Access denied to S3 bucket '{config.bucket_name}'") from e + else: + raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e + + +def _make_file_object( + *, + id: str, + filename: str, + purpose: str, + bytes: int, + created_at: int, + expires_at: int, + **kwargs: Any, # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore +) -> OpenAIFileObject: + """ + Construct an OpenAIFileObject and normalize expires_at. + + If expires_at is greater than the max we treat it as no-expiration and + return None for expires_at. + + The OpenAI spec says expires_at type is Integer, but the implementation + will return None for no expiration. + """ + obj = OpenAIFileObject( + id=id, + filename=filename, + purpose=OpenAIFilePurpose(purpose), + bytes=bytes, + created_at=created_at, + expires_at=expires_at, + ) + + if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX): + obj.expires_at = None # type: ignore + + return obj + + +class S3FilesImpl(Files): + """S3-based implementation of the Files API.""" + + def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None: + self._config = config + self.policy = policy + self._client: S3Client | None = None + self._sql_store: AuthorizedSqlStore | None = None + + def _now(self) -> int: + """Return current UTC timestamp as int seconds.""" + return int(datetime.now(UTC).timestamp()) + + async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]: + where: dict[str, str | dict] = {"id": file_id} + if not return_expired: + where["expires_at"] = {">": self._now()} + if not (row := await self.sql_store.fetch_one("openai_files", where=where)): + raise ResourceNotFoundError(file_id, "File", "files.list()") + return row + + async def _delete_file(self, file_id: str) -> None: + """Delete a file from S3 and the database.""" + try: + self.client.delete_object( + Bucket=self._config.bucket_name, + Key=file_id, + ) + except ClientError as e: + if e.response["Error"]["Code"] != "NoSuchKey": + raise RuntimeError(f"Failed to delete file from S3: {e}") from e + + await self.sql_store.delete("openai_files", where={"id": file_id}) + + async def _delete_if_expired(self, file_id: str) -> None: + """If the file exists and is expired, delete it.""" + if row := await self._get_file(file_id, return_expired=True): + if (expires_at := row.get("expires_at")) and expires_at <= self._now(): + await self._delete_file(file_id) + + async def initialize(self) -> None: + self._client = _create_s3_client(self._config) + await _create_bucket_if_not_exists(self._client, self._config) + + self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy) + await self._sql_store.create_table( + "openai_files", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "filename": ColumnType.STRING, + "purpose": ColumnType.STRING, + "bytes": ColumnType.INTEGER, + "created_at": ColumnType.INTEGER, + "expires_at": ColumnType.INTEGER, + # TODO: add s3_etag field for integrity checking + }, + ) + + async def shutdown(self) -> None: + pass + + @property + def client(self) -> "S3Client": + assert self._client is not None, "Provider not initialized" + return self._client + + @property + def sql_store(self) -> AuthorizedSqlStore: + assert self._sql_store is not None, "Provider not initialized" + return self._sql_store + + async def openai_upload_file( + self, + file: Annotated[UploadFile, File()], + purpose: Annotated[OpenAIFilePurpose, Form()], + expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, + ) -> OpenAIFileObject: + file_id = generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}") + + filename = getattr(file, "filename", None) or "uploaded_file" + + created_at = self._now() + + # the default is no expiration. + # to implement no expiration we set an expiration beyond the max. + # we'll hide this fact from users when returning the file object. + expires_at = created_at + ExpiresAfter.MAX * 42 + # the default for BATCH files is 30 days, which happens to be the expiration max. + if purpose == OpenAIFilePurpose.BATCH: + expires_at = created_at + ExpiresAfter.MAX + + if expires_after is not None: + expires_at = created_at + expires_after.seconds + + content = await file.read() + file_size = len(content) + + entry: dict[str, Any] = { + "id": file_id, + "filename": filename, + "purpose": purpose.value, + "bytes": file_size, + "created_at": created_at, + "expires_at": expires_at, + } + + await self.sql_store.insert("openai_files", entry) + + try: + self.client.put_object( + Bucket=self._config.bucket_name, + Key=file_id, + Body=content, + # TODO: enable server-side encryption + ) + except ClientError as e: + await self.sql_store.delete("openai_files", where={"id": file_id}) + + raise RuntimeError(f"Failed to upload file to S3: {e}") from e + + return _make_file_object(**entry) + + async def openai_list_files( + self, + after: str | None = None, + limit: int | None = 10000, + order: Order | None = Order.desc, + purpose: OpenAIFilePurpose | None = None, + ) -> ListOpenAIFileResponse: + # this purely defensive. it should not happen because the router also default to Order.desc. + if not order: + order = Order.desc + + where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}} + if purpose: + where_conditions["purpose"] = purpose.value + + paginated_result = await self.sql_store.fetch_all( + table="openai_files", + where=where_conditions, + order_by=[("created_at", order.value)], + cursor=("id", after) if after else None, + limit=limit, + ) + + files = [_make_file_object(**row) for row in paginated_result.data] + + return ListOpenAIFileResponse( + data=files, + has_more=paginated_result.has_more, + # empty string or None? spec says str, ref impl returns str | None, we go with spec + first_id=files[0].id if files else "", + last_id=files[-1].id if files else "", + ) + + async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: + await self._delete_if_expired(file_id) + row = await self._get_file(file_id) + return _make_file_object(**row) + + async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: + await self._delete_if_expired(file_id) + _ = await self._get_file(file_id) # raises if not found + await self._delete_file(file_id) + return OpenAIFileDeleteResponse(id=file_id, deleted=True) + + async def openai_retrieve_file_content(self, file_id: str) -> Response: + await self._delete_if_expired(file_id) + + row = await self._get_file(file_id) + + try: + response = self.client.get_object( + Bucket=self._config.bucket_name, + Key=row["id"], + ) + # TODO: can we stream this instead of loading it into memory + content = response["Body"].read() + except ClientError as e: + if e.response["Error"]["Code"] == "NoSuchKey": + await self._delete_file(file_id) + raise ResourceNotFoundError(file_id, "File", "files.list()") from e + raise RuntimeError(f"Failed to download file from S3: {e}") from e + + return Response( + content=content, + media_type="application/octet-stream", + headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'}, + ) diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/remote/inference/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py rename to src/llama_stack/providers/remote/inference/__init__.py diff --git a/llama_stack/providers/remote/inference/anthropic/__init__.py b/src/llama_stack/providers/remote/inference/anthropic/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/anthropic/__init__.py rename to src/llama_stack/providers/remote/inference/anthropic/__init__.py diff --git a/llama_stack/providers/remote/inference/anthropic/anthropic.py b/src/llama_stack/providers/remote/inference/anthropic/anthropic.py similarity index 91% rename from llama_stack/providers/remote/inference/anthropic/anthropic.py rename to src/llama_stack/providers/remote/inference/anthropic/anthropic.py index dc9d8fb40..112b70524 100644 --- a/llama_stack/providers/remote/inference/anthropic/anthropic.py +++ b/src/llama_stack/providers/remote/inference/anthropic/anthropic.py @@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin): return "https://api.anthropic.com/v1" async def list_provider_model_ids(self) -> Iterable[str]: - return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()] + api_key = self._get_api_key_from_config_or_provider_data() + return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()] diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py new file mode 100644 index 000000000..b706b90e1 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/anthropic/config.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class AnthropicProviderDataValidator(BaseModel): + anthropic_api_key: str | None = Field( + default=None, + description="API key for Anthropic models", + ) + + +@json_schema_type +class AnthropicConfig(RemoteInferenceProviderConfig): + @classmethod + def sample_run_config(cls, api_key: str = "${env.ANTHROPIC_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/azure/__init__.py b/src/llama_stack/providers/remote/inference/azure/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/azure/__init__.py rename to src/llama_stack/providers/remote/inference/azure/__init__.py diff --git a/llama_stack/providers/remote/inference/azure/azure.py b/src/llama_stack/providers/remote/inference/azure/azure.py similarity index 85% rename from llama_stack/providers/remote/inference/azure/azure.py rename to src/llama_stack/providers/remote/inference/azure/azure.py index 134d01b15..c977d75d5 100644 --- a/llama_stack/providers/remote/inference/azure/azure.py +++ b/src/llama_stack/providers/remote/inference/azure/azure.py @@ -4,8 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from urllib.parse import urljoin - from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import AzureConfig @@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin): Returns the Azure API base URL from the configuration. """ - return urljoin(str(self.config.api_base), "/openai/v1") + return str(self.config.base_url) diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py new file mode 100644 index 000000000..f6407a183 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/azure/config.py @@ -0,0 +1,62 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class AzureProviderDataValidator(BaseModel): + azure_api_key: SecretStr = Field( + description="Azure API key for Azure", + ) + azure_api_base: HttpUrl = Field( + description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)", + ) + azure_api_version: str | None = Field( + default=None, + description="Azure API version for Azure (e.g., 2024-06-01)", + ) + azure_api_type: str | None = Field( + default="azure", + description="Azure API type for Azure (e.g., azure)", + ) + + +@json_schema_type +class AzureConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=None, + description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)", + ) + api_version: str | None = Field( + default_factory=lambda: os.getenv("AZURE_API_VERSION"), + description="Azure API version for Azure (e.g., 2024-12-01-preview)", + ) + api_type: str | None = Field( + default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"), + description="Azure API type for Azure (e.g., azure)", + ) + + @classmethod + def sample_run_config( + cls, + api_key: str = "${env.AZURE_API_KEY:=}", + base_url: str = "${env.AZURE_API_BASE:=}", + api_version: str = "${env.AZURE_API_VERSION:=}", + api_type: str = "${env.AZURE_API_TYPE:=}", + **kwargs, + ) -> dict[str, Any]: + return { + "api_key": api_key, + "base_url": base_url, + "api_version": api_version, + "api_type": api_type, + } diff --git a/src/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py new file mode 100644 index 000000000..4b0686b18 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/bedrock/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from .config import BedrockConfig + + +async def get_adapter_impl(config: BedrockConfig, _deps): + from .bedrock import BedrockInferenceAdapter + + assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" + + impl = BedrockInferenceAdapter(config=config) + + await impl.initialize() + + return impl diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py new file mode 100644 index 000000000..a890a568e --- /dev/null +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -0,0 +1,126 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncIterator, Iterable + +from openai import AuthenticationError + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) + +from .config import BedrockConfig + +logger = get_logger(name=__name__, category="inference::bedrock") + + +class BedrockInferenceAdapter(OpenAIMixin): + """ + Adapter for AWS Bedrock's OpenAI-compatible API endpoints. + + Supports Llama models across regions and GPT-OSS models (us-west-2 only). + + Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models + for dynamic model discovery. Models must be pre-registered in the config. + """ + + config: BedrockConfig + provider_data_api_key_field: str = "aws_bearer_token_bedrock" + + def get_base_url(self) -> str: + """Get base URL for OpenAI client.""" + return f"https://bedrock-runtime.{self.config.region_name}.amazonaws.com/openai/v1" + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + Bedrock's OpenAI-compatible endpoint does not support the /v1/models endpoint. + Returns empty list since models must be pre-registered in the config. + """ + return [] + + async def check_model_availability(self, model: str) -> bool: + """ + Bedrock doesn't support dynamic model listing via /v1/models. + Always return True to accept all models registered in the config. + """ + return True + + async def openai_embeddings( + self, + params: OpenAIEmbeddingsRequestWithExtraBody, + ) -> OpenAIEmbeddingsResponse: + """Bedrock's OpenAI-compatible API does not support the /v1/embeddings endpoint.""" + raise NotImplementedError( + "Bedrock's OpenAI-compatible API does not support /v1/embeddings endpoint. " + "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html" + ) + + async def openai_completion( + self, + params: OpenAICompletionRequestWithExtraBody, + ) -> OpenAICompletion: + """Bedrock's OpenAI-compatible API does not support the /v1/completions endpoint.""" + raise NotImplementedError( + "Bedrock's OpenAI-compatible API does not support /v1/completions endpoint. " + "Only /v1/chat/completions is supported. " + "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html" + ) + + async def openai_chat_completion( + self, + params: OpenAIChatCompletionRequestWithExtraBody, + ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: + """Override to enable streaming usage metrics and handle authentication errors.""" + # Enable streaming usage metrics when telemetry is active + if params.stream: + if params.stream_options is None: + params.stream_options = {"include_usage": True} + elif "include_usage" not in params.stream_options: + params.stream_options = {**params.stream_options, "include_usage": True} + + try: + logger.debug(f"Calling Bedrock OpenAI API with model={params.model}, stream={params.stream}") + result = await super().openai_chat_completion(params=params) + logger.debug(f"Bedrock API returned: {type(result).__name__ if result is not None else 'None'}") + + if result is None: + logger.error(f"Bedrock OpenAI client returned None for model={params.model}, stream={params.stream}") + raise RuntimeError( + f"Bedrock API returned no response for model '{params.model}'. " + "This may indicate the model is not supported or a network/API issue occurred." + ) + + return result + except AuthenticationError as e: + error_msg = str(e) + + # Check if this is a token expiration error + if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg: + logger.error(f"AWS Bedrock authentication token expired: {error_msg}") + raise ValueError( + "AWS Bedrock authentication failed: Bearer token has expired. " + "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. " + "Please refresh your token by generating a new pre-signed URL with AWS credentials. " + "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints." + ) from e + else: + logger.error(f"AWS Bedrock authentication failed: {error_msg}") + raise ValueError( + f"AWS Bedrock authentication failed: {error_msg}. " + "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. " + "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint." + ) from e + except Exception as e: + logger.error(f"Unexpected error calling Bedrock API: {type(e).__name__}: {e}", exc_info=True) + raise diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py new file mode 100644 index 000000000..f31db63aa --- /dev/null +++ b/src/llama_stack/providers/remote/inference/bedrock/config.py @@ -0,0 +1,32 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig + + +class BedrockProviderDataValidator(BaseModel): + aws_bearer_token_bedrock: str | None = Field( + default=None, + description="API Key (Bearer token) for Amazon Bedrock", + ) + + +class BedrockConfig(RemoteInferenceProviderConfig): + region_name: str = Field( + default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"), + description="AWS Region for the Bedrock Runtime endpoint", + ) + + @classmethod + def sample_run_config(cls, **kwargs): + return { + "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}", + "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}", + } diff --git a/llama_stack/providers/remote/inference/cerebras/__init__.py b/src/llama_stack/providers/remote/inference/cerebras/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/cerebras/__init__.py rename to src/llama_stack/providers/remote/inference/cerebras/__init__.py diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py similarity index 84% rename from llama_stack/providers/remote/inference/cerebras/cerebras.py rename to src/llama_stack/providers/remote/inference/cerebras/cerebras.py index daf67616b..23c27df1e 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -4,13 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from urllib.parse import urljoin - -from llama_stack.apis.inference import ( +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig @@ -18,8 +16,10 @@ from .config import CerebrasImplConfig class CerebrasInferenceAdapter(OpenAIMixin): config: CerebrasImplConfig + provider_data_api_key_field: str = "cerebras_api_key" + def get_base_url(self) -> str: - return urljoin(self.config.base_url, "v1") + return str(self.config.base_url) async def openai_embeddings( self, diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py new file mode 100644 index 000000000..ea88abbea --- /dev/null +++ b/src/llama_stack/providers/remote/inference/cerebras/config.py @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + +DEFAULT_BASE_URL = "https://api.cerebras.ai/v1" + + +class CerebrasProviderDataValidator(BaseModel): + cerebras_api_key: str | None = Field( + default=None, + description="API key for Cerebras models", + ) + + +@json_schema_type +class CerebrasImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)), + description="Base URL for the Cerebras API", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "base_url": DEFAULT_BASE_URL, + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/databricks/__init__.py b/src/llama_stack/providers/remote/inference/databricks/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/databricks/__init__.py rename to src/llama_stack/providers/remote/inference/databricks/__init__.py diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py new file mode 100644 index 000000000..44cb862f9 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/databricks/config.py @@ -0,0 +1,44 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class DatabricksProviderDataValidator(BaseModel): + databricks_api_token: str | None = Field( + default=None, + description="API token for Databricks models", + ) + + +@json_schema_type +class DatabricksImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=None, + description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)", + ) + auth_credential: SecretStr | None = Field( + default=None, + alias="api_token", + description="The Databricks API token", + ) + + @classmethod + def sample_run_config( + cls, + base_url: str = "${env.DATABRICKS_HOST:=}", + api_token: str = "${env.DATABRICKS_TOKEN:=}", + **kwargs: Any, + ) -> dict[str, Any]: + return { + "base_url": base_url, + "api_token": api_token, + } diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py new file mode 100644 index 000000000..f2f8832f6 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py @@ -0,0 +1,54 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Iterable + +from databricks.sdk import WorkspaceClient + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody + +from .config import DatabricksImplConfig + +logger = get_logger(name=__name__, category="inference::databricks") + + +class DatabricksInferenceAdapter(OpenAIMixin): + config: DatabricksImplConfig + + provider_data_api_key_field: str = "databricks_api_token" + + # source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models + embedding_model_metadata: dict[str, dict[str, int]] = { + "databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192}, + "databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512}, + } + + def get_base_url(self) -> str: + return str(self.config.base_url) + + async def list_provider_model_ids(self) -> Iterable[str]: + # Filter out None values from endpoint names + api_token = self._get_api_key_from_config_or_provider_data() + # WorkspaceClient expects base host without /serving-endpoints suffix + base_url_str = str(self.config.base_url) + if base_url_str.endswith("/serving-endpoints"): + host = base_url_str[:-18] # Remove '/serving-endpoints' + else: + host = base_url_str + return [ + endpoint.name # type: ignore[misc] + for endpoint in WorkspaceClient( + host=host, token=api_token + ).serving_endpoints.list() # TODO: this is not async + ] + + async def openai_completion( + self, + params: OpenAICompletionRequestWithExtraBody, + ) -> OpenAICompletion: + raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/fireworks/__init__.py b/src/llama_stack/providers/remote/inference/fireworks/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/fireworks/__init__.py rename to src/llama_stack/providers/remote/inference/fireworks/__init__.py diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py new file mode 100644 index 000000000..c59b5f270 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/fireworks/config.py @@ -0,0 +1,27 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class FireworksImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.fireworks.ai/inference/v1"), + description="The URL for the Fireworks server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "base_url": "https://api.fireworks.ai/inference/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py similarity index 94% rename from llama_stack/providers/remote/inference/fireworks/fireworks.py rename to src/llama_stack/providers/remote/inference/fireworks/fireworks.py index 7e2b73546..61ea0b1f6 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin): provider_data_api_key_field: str = "fireworks_api_key" def get_base_url(self) -> str: - return "https://api.fireworks.ai/inference/v1" + return str(self.config.base_url) diff --git a/llama_stack/providers/remote/inference/gemini/__init__.py b/src/llama_stack/providers/remote/inference/gemini/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/gemini/__init__.py rename to src/llama_stack/providers/remote/inference/gemini/__init__.py diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py new file mode 100644 index 000000000..46cec7d0d --- /dev/null +++ b/src/llama_stack/providers/remote/inference/gemini/config.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class GeminiProviderDataValidator(BaseModel): + gemini_api_key: str | None = Field( + default=None, + description="API key for Gemini models", + ) + + +@json_schema_type +class GeminiConfig(RemoteInferenceProviderConfig): + @classmethod + def sample_run_config(cls, api_key: str = "${env.GEMINI_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py similarity index 78% rename from llama_stack/providers/remote/inference/gemini/gemini.py rename to src/llama_stack/providers/remote/inference/gemini/gemini.py index 27fea8b32..f6f48cc2b 100644 --- a/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py @@ -4,15 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from openai import NOT_GIVEN +from typing import Any -from llama_stack.apis.inference import ( +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import GeminiConfig @@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin): Override embeddings method to handle Gemini's missing usage statistics. Gemini's embedding API doesn't return usage information, so we provide default values. """ - # Prepare request parameters - request_params = { + # Build request params conditionally to avoid NotGiven/Omit type mismatch + request_params: dict[str, Any] = { "model": await self._get_provider_model_id(params.model), "input": params.input, - "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, - "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, - "user": params.user if params.user is not None else NOT_GIVEN, } + if params.encoding_format is not None: + request_params["encoding_format"] = params.encoding_format + if params.dimensions is not None: + request_params["dimensions"] = params.dimensions + if params.user is not None: + request_params["user"] = params.user + if params.model_extra: + request_params["extra_body"] = params.model_extra - # Add extra_body if present - extra_body = params.model_extra - if extra_body: - request_params["extra_body"] = extra_body - - # Call OpenAI embeddings API with properly typed parameters response = await self.client.embeddings.create(**request_params) data = [] diff --git a/llama_stack/providers/remote/inference/groq/__init__.py b/src/llama_stack/providers/remote/inference/groq/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/groq/__init__.py rename to src/llama_stack/providers/remote/inference/groq/__init__.py diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py new file mode 100644 index 000000000..e5c29c271 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/groq/config.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class GroqProviderDataValidator(BaseModel): + groq_api_key: str | None = Field( + default=None, + description="API key for Groq models", + ) + + +@json_schema_type +class GroqConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.groq.com/openai/v1"), + description="The URL for the Groq AI server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "base_url": "https://api.groq.com/openai/v1", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/src/llama_stack/providers/remote/inference/groq/groq.py similarity index 91% rename from llama_stack/providers/remote/inference/groq/groq.py rename to src/llama_stack/providers/remote/inference/groq/groq.py index 3a4f2626d..f99de91ca 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/src/llama_stack/providers/remote/inference/groq/groq.py @@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin): provider_data_api_key_field: str = "groq_api_key" def get_base_url(self) -> str: - return f"{self.config.url}/openai/v1" + return str(self.config.base_url) diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/llama_openai_compat/__init__.py rename to src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py new file mode 100644 index 000000000..a0f80d969 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class LlamaProviderDataValidator(BaseModel): + llama_api_key: str | None = Field( + default=None, + description="API key for api.llama models", + ) + + +@json_schema_type +class LlamaCompatConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.llama.com/compat/v1/"), + description="The URL for the Llama API server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]: + return { + "base_url": "https://api.llama.com/compat/v1/", + "api_key": api_key, + } diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py similarity index 92% rename from llama_stack/providers/remote/inference/llama_openai_compat/llama.py rename to src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 05d6e8cc8..f29aebf36 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -4,15 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference.inference import ( +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.log import get_logger -from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin logger = get_logger(name=__name__, category="inference::llama_openai_compat") @@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin): :return: The Llama API base URL """ - return self.config.openai_compat_api_base + return str(self.config.base_url) async def openai_completion( self, diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md similarity index 91% rename from llama_stack/providers/remote/inference/nvidia/NVIDIA.md rename to src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index f1a828413..d3bdc4fb7 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -18,6 +18,7 @@ This provider enables running inference using NVIDIA NIM. Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` @@ -181,3 +182,22 @@ vlm_response = client.chat.completions.create( print(f"VLM Response: {vlm_response.choices[0].message.content}") ``` + +### Rerank Example + +The following example shows how to rerank documents using an NVIDIA NIM. + +```python +rerank_response = client.alpha.inference.rerank( + model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ], +) + +for i, result in enumerate(rerank_response): + print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]") +``` diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py new file mode 100644 index 000000000..b89b2a750 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Inference + +from .config import NVIDIAConfig + + +async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference: + # import dynamically so `llama stack list-deps` does not fail due to missing dependencies + from .nvidia import NVIDIAInferenceAdapter + + if not isinstance(config, NVIDIAConfig): + raise RuntimeError(f"Unexpected config type: {type(config)}") + adapter = NVIDIAInferenceAdapter(config=config) + await adapter.initialize() + return adapter + + +__all__ = ["get_adapter_impl", "NVIDIAConfig"] diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py new file mode 100644 index 000000000..e1e9a0ea9 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/nvidia/config.py @@ -0,0 +1,74 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class NVIDIAProviderDataValidator(BaseModel): + nvidia_api_key: str | None = Field( + default=None, + description="API key for NVIDIA NIM models", + ) + + +@json_schema_type +class NVIDIAConfig(RemoteInferenceProviderConfig): + """ + Configuration for the NVIDIA NIM inference endpoint. + + Attributes: + url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000 + api_key (str): The access key for the hosted NIM endpoints + rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints + + There are two ways to access NVIDIA NIMs - + 0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com + 1. Self-hosted: You can run NVIDIA NIMs on your own infrastructure + + By default the configuration is set to use the hosted APIs. This requires + an API key which can be obtained from https://ngc.nvidia.com/. + + By default the configuration will attempt to read the NVIDIA_API_KEY environment + variable to set the api_key. Please do not put your API key in code. + + If you are using a self-hosted NVIDIA NIM, you can set the url to the + URL of your running NVIDIA NIM and do not need to set the api_key. + """ + + base_url: HttpUrl | None = Field( + default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"), + description="A base url for accessing the NVIDIA NIM", + ) + timeout: int = Field( + default=60, + description="Timeout for the HTTP requests", + ) + rerank_model_to_url: dict[str, str] = Field( + default_factory=lambda: { + "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", + "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", + "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + }, + description="Mapping of rerank model identifiers to their API endpoints. ", + ) + + @classmethod + def sample_run_config( + cls, + base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}", + api_key: str = "${env.NVIDIA_API_KEY:=}", + **kwargs, + ) -> dict[str, Any]: + return { + "base_url": base_url, + "api_key": api_key, + } diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py new file mode 100644 index 000000000..5d0d52d6a --- /dev/null +++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -0,0 +1,173 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from collections.abc import Iterable + +import aiohttp + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + Model, + ModelType, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + RerankData, + RerankResponse, +) + +from . import NVIDIAConfig +from .utils import _is_nvidia_hosted + +logger = get_logger(name=__name__, category="inference::nvidia") + + +class NVIDIAInferenceAdapter(OpenAIMixin): + config: NVIDIAConfig + + provider_data_api_key_field: str = "nvidia_api_key" + + """ + NVIDIA Inference Adapter for Llama Stack. + """ + + # source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html + embedding_model_metadata: dict[str, dict[str, int]] = { + "nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192}, + "nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024}, + "nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096}, + "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024}, + } + + async def initialize(self) -> None: + logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...") + + if _is_nvidia_hosted(self.config): + if not self.config.auth_credential: + raise RuntimeError( + "API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM." + ) + + def get_api_key(self) -> str: + """ + Get the API key for OpenAI mixin. + + :return: The NVIDIA API key + """ + if self.config.auth_credential: + return self.config.auth_credential.get_secret_value() + + if not _is_nvidia_hosted(self.config): + return "NO KEY REQUIRED" + + return None + + def get_base_url(self) -> str: + """ + Get the base URL for OpenAI mixin. + + :return: The NVIDIA API base URL + """ + return str(self.config.base_url) + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + Return both dynamic model IDs and statically configured rerank model IDs. + """ + dynamic_ids: Iterable[str] = [] + try: + dynamic_ids = await super().list_provider_model_ids() + except Exception: + # If the dynamic listing fails, proceed with just configured rerank IDs + dynamic_ids = [] + + configured_rerank_ids = list(self.config.rerank_model_to_url.keys()) + return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids)) # remove duplicates + + def construct_model_from_identifier(self, identifier: str) -> Model: + """ + Classify rerank models from config; otherwise use the base behavior. + """ + if identifier in self.config.rerank_model_to_url: + return Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=identifier, + identifier=identifier, + model_type=ModelType.rerank, + ) + return super().construct_model_from_identifier(identifier) + + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + provider_model_id = await self._get_provider_model_id(model) + + ranking_url = self.get_base_url() + + if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url: + ranking_url = self.config.rerank_model_to_url[provider_model_id] + + logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}") + + # Convert query to text format + if isinstance(query, str): + query_text = query + elif isinstance(query, OpenAIChatCompletionContentPartTextParam): + query_text = query.text + else: + raise ValueError("Query must be a string or text content part") + + # Convert items to text format + passages = [] + for item in items: + if isinstance(item, str): + passages.append({"text": item}) + elif isinstance(item, OpenAIChatCompletionContentPartTextParam): + passages.append({"text": item.text}) + else: + raise ValueError("Items must be strings or text content parts") + + payload = { + "model": provider_model_id, + "query": {"text": query_text}, + "passages": passages, + } + + headers = { + "Authorization": f"Bearer {self.get_api_key()}", + "Content-Type": "application/json", + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(ranking_url, headers=headers, json=payload) as response: + if response.status != 200: + response_text = await response.text() + raise ConnectionError( + f"NVIDIA rerank API request failed with status {response.status}: {response_text}" + ) + + result = await response.json() + rankings = result.get("rankings", []) + + # Convert to RerankData format + rerank_data = [] + for ranking in rankings: + rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"])) + + # Apply max_num_results limit + if max_num_results is not None: + rerank_data = rerank_data[:max_num_results] + + return RerankResponse(data=rerank_data) + + except aiohttp.ClientError as e: + raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e diff --git a/src/llama_stack/providers/remote/inference/nvidia/utils.py b/src/llama_stack/providers/remote/inference/nvidia/utils.py new file mode 100644 index 000000000..c138d1fc5 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/nvidia/utils.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from . import NVIDIAConfig + + +def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: + return "integrate.api.nvidia.com" in str(config.base_url) diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py new file mode 100644 index 000000000..b7d6125f3 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import InferenceProvider + +from .config import OCIConfig + + +async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider: + from .oci import OCIInferenceAdapter + + adapter = OCIInferenceAdapter(config=config) + await adapter.initialize() + return adapter diff --git a/src/llama_stack/providers/remote/inference/oci/auth.py b/src/llama_stack/providers/remote/inference/oci/auth.py new file mode 100644 index 000000000..f64436eb5 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/auth.py @@ -0,0 +1,79 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Generator, Mapping +from typing import Any, override + +import httpx +import oci +import requests +from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE + +OciAuthSigner = type[oci.signer.AbstractBaseSigner] + + +class HttpxOciAuth(httpx.Auth): + """ + Custom HTTPX authentication class that implements OCI request signing. + + This class handles the authentication flow for HTTPX requests by signing them + using the OCI Signer, which adds the necessary authentication headers for + OCI API calls. + + Attributes: + signer (oci.signer.Signer): The OCI signer instance used for request signing + """ + + def __init__(self, signer: OciAuthSigner): + self.signer = signer + + @override + def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]: + # Read the request content to handle streaming requests properly + try: + content = request.content + except httpx.RequestNotRead: + # For streaming requests, we need to read the content first + content = request.read() + + req = requests.Request( + method=request.method, + url=str(request.url), + headers=dict(request.headers), + data=content, + ) + prepared_request = req.prepare() + + # Sign the request using the OCI Signer + self.signer.do_request_sign(prepared_request) # type: ignore + + # Update the original HTTPX request with the signed headers + request.headers.update(prepared_request.headers) + + yield request + + +class OciInstancePrincipalAuth(HttpxOciAuth): + def __init__(self, **kwargs: Mapping[str, Any]): + self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs) + + +class OciUserPrincipalAuth(HttpxOciAuth): + def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE): + config = oci.config.from_file(config_file, profile_name) + oci.config.validate_config(config) # type: ignore + key_content = "" + with open(config["key_file"]) as f: + key_content = f.read() + + self.signer = oci.signer.Signer( + tenancy=config["tenancy"], + user=config["user"], + fingerprint=config["fingerprint"], + private_key_file_location=config.get("key_file"), + pass_phrase="none", # type: ignore + private_key_content=key_content, + ) diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py new file mode 100644 index 000000000..93cc36d76 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/config.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class OCIProviderDataValidator(BaseModel): + oci_auth_type: str = Field( + description="OCI authentication type (must be one of: instance_principal, config_file)", + ) + oci_region: str = Field( + description="OCI region (e.g., us-ashburn-1)", + ) + oci_compartment_id: str = Field( + description="OCI compartment ID for the Generative AI service", + ) + oci_config_file_path: str | None = Field( + default="~/.oci/config", + description="OCI config file path (required if oci_auth_type is config_file)", + ) + oci_config_profile: str | None = Field( + default="DEFAULT", + description="OCI config profile (required if oci_auth_type is config_file)", + ) + + +@json_schema_type +class OCIConfig(RemoteInferenceProviderConfig): + oci_auth_type: str = Field( + description="OCI authentication type (must be one of: instance_principal, config_file)", + default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"), + ) + oci_region: str = Field( + default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"), + description="OCI region (e.g., us-ashburn-1)", + ) + oci_compartment_id: str = Field( + default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""), + description="OCI compartment ID for the Generative AI service", + ) + oci_config_file_path: str = Field( + default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"), + description="OCI config file path (required if oci_auth_type is config_file)", + ) + oci_config_profile: str = Field( + default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"), + description="OCI config profile (required if oci_auth_type is config_file)", + ) + + @classmethod + def sample_run_config( + cls, + oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}", + oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}", + oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}", + oci_region: str = "${env.OCI_REGION:=us-ashburn-1}", + oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}", + **kwargs, + ) -> dict[str, Any]: + return { + "oci_auth_type": oci_auth_type, + "oci_config_file_path": oci_config_file_path, + "oci_config_profile": oci_config_profile, + "oci_region": oci_region, + "oci_compartment_id": oci_compartment_id, + } diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py new file mode 100644 index 000000000..239443963 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/oci.py @@ -0,0 +1,140 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from collections.abc import Iterable +from typing import Any + +import httpx +import oci +from oci.generative_ai.generative_ai_client import GenerativeAiClient +from oci.generative_ai.models import ModelCollection +from openai._base_client import DefaultAsyncHttpxClient + +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth +from llama_stack.providers.remote.inference.oci.config import OCIConfig +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + ModelType, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) + +logger = get_logger(name=__name__, category="inference::oci") + +OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal" +OCI_AUTH_TYPE_CONFIG_FILE = "config_file" +VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE] +DEFAULT_OCI_REGION = "us-ashburn-1" + +MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"] + + +class OCIInferenceAdapter(OpenAIMixin): + config: OCIConfig + + async def initialize(self) -> None: + """Initialize and validate OCI configuration.""" + if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES: + raise ValueError( + f"Invalid OCI authentication type: {self.config.oci_auth_type}." + f"Valid types are one of: {VALID_OCI_AUTH_TYPES}" + ) + + if not self.config.oci_compartment_id: + raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.") + + def get_base_url(self) -> str: + region = self.config.oci_region or DEFAULT_OCI_REGION + return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1" + + def get_api_key(self) -> str | None: + # OCI doesn't use API keys, it uses request signing + return "" + + def get_extra_client_params(self) -> dict[str, Any]: + """ + Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers. + """ + auth = self._get_auth() + compartment_id = self.config.oci_compartment_id or "" + + return { + "http_client": DefaultAsyncHttpxClient( + auth=auth, + headers={ + "CompartmentId": compartment_id, + }, + ), + } + + def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + return oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + return None + + def _get_oci_config(self) -> dict: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + config = {"region": self.config.oci_region} + elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE: + config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile) + if not config.get("region"): + raise ValueError( + "Region not specified in config. Please specify in config or with OCI_REGION env variable." + ) + + return config + + def _get_auth(self) -> httpx.Auth: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + return OciInstancePrincipalAuth() + elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE: + return OciUserPrincipalAuth( + config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile + ) + else: + raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}") + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + List available models from OCI Generative AI service. + """ + oci_config = self._get_oci_config() + oci_signer = self._get_oci_signer() + compartment_id = self.config.oci_compartment_id or "" + + if oci_signer is None: + client = GenerativeAiClient(config=oci_config) + else: + client = GenerativeAiClient(config=oci_config, signer=oci_signer) + + models: ModelCollection = client.list_models( + compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE" + ).data + + seen_models = set() + model_ids = [] + for model in models.items: + if model.time_deprecated or model.time_on_demand_retired: + continue + + if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities: + continue + + # Use display_name + model_type as the key to avoid conflicts + model_key = (model.display_name, ModelType.llm) + if model_key in seen_models: + continue + + seen_models.add(model_key) + model_ids.append(model.display_name) + + return model_ids + + async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse: + # The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings. + raise NotImplementedError("OCI Provider does not (currently) support embeddings") diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/src/llama_stack/providers/remote/inference/ollama/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/ollama/__init__.py rename to src/llama_stack/providers/remote/inference/ollama/__init__.py diff --git a/src/llama_stack/providers/remote/inference/ollama/config.py b/src/llama_stack/providers/remote/inference/ollama/config.py new file mode 100644 index 000000000..60dd34fa8 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/ollama/config.py @@ -0,0 +1,27 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import Field, HttpUrl, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig + +DEFAULT_OLLAMA_URL = "http://localhost:11434/v1" + + +class OllamaImplConfig(RemoteInferenceProviderConfig): + auth_credential: SecretStr | None = Field(default=None, exclude=True) + + base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL)) + + @classmethod + def sample_run_config( + cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs + ) -> dict[str, Any]: + return { + "base_url": base_url, + } diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py similarity index 88% rename from llama_stack/providers/remote/inference/ollama/ollama.py rename to src/llama_stack/providers/remote/inference/ollama/ollama.py index 50f36d045..e8b872384 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py @@ -9,15 +9,15 @@ import asyncio from ollama import AsyncClient as AsyncOllamaClient -from llama_stack.apis.common.errors import UnsupportedModelError -from llama_stack.apis.models import Model from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - HealthResponse, - HealthStatus, -) from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + HealthResponse, + HealthStatus, + Model, + UnsupportedModelError, +) logger = get_logger(name=__name__, category="inference::ollama") @@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin): # ollama client attaches itself to the current event loop (sadly?) loop = asyncio.get_running_loop() if loop not in self._clients: - self._clients[loop] = AsyncOllamaClient(host=self.config.url) + # Ollama client expects base URL without /v1 suffix + base_url_str = str(self.config.base_url) + if base_url_str.endswith("/v1"): + host = base_url_str[:-3] + else: + host = base_url_str + self._clients[loop] = AsyncOllamaClient(host=host) return self._clients[loop] def get_api_key(self): return "NO KEY REQUIRED" def get_base_url(self): - return self.config.url.rstrip("/") + "/v1" + return str(self.config.base_url) async def initialize(self) -> None: - logger.info(f"checking connectivity to Ollama at `{self.config.url}`...") + logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...") r = await self.health() if r["status"] == HealthStatus.ERROR: logger.warning( diff --git a/llama_stack/providers/remote/inference/openai/__init__.py b/src/llama_stack/providers/remote/inference/openai/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/openai/__init__.py rename to src/llama_stack/providers/remote/inference/openai/__init__.py diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py new file mode 100644 index 000000000..2057cd0d6 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/openai/config.py @@ -0,0 +1,39 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class OpenAIProviderDataValidator(BaseModel): + openai_api_key: str | None = Field( + default=None, + description="API key for OpenAI models", + ) + + +@json_schema_type +class OpenAIConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.openai.com/v1"), + description="Base URL for OpenAI API", + ) + + @classmethod + def sample_run_config( + cls, + api_key: str = "${env.OPENAI_API_KEY:=}", + base_url: str = "${env.OPENAI_BASE_URL:=https://api.openai.com/v1}", + **kwargs, + ) -> dict[str, Any]: + return { + "api_key": api_key, + "base_url": base_url, + } diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/src/llama_stack/providers/remote/inference/openai/openai.py similarity index 96% rename from llama_stack/providers/remote/inference/openai/openai.py rename to src/llama_stack/providers/remote/inference/openai/openai.py index 52bc48f1a..2d465546a 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/src/llama_stack/providers/remote/inference/openai/openai.py @@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin): Returns the OpenAI API base URL from the configuration. """ - return self.config.base_url + return str(self.config.base_url) diff --git a/src/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py new file mode 100644 index 000000000..1cc46bff1 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/passthrough/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pydantic import BaseModel + +from .config import PassthroughImplConfig + + +class PassthroughProviderDataValidator(BaseModel): + passthrough_url: str + passthrough_api_key: str + + +async def get_adapter_impl(config: PassthroughImplConfig, _deps): + from .passthrough import PassthroughInferenceAdapter + + assert isinstance(config, PassthroughImplConfig), f"Unexpected config type: {type(config)}" + impl = PassthroughInferenceAdapter(config) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py new file mode 100644 index 000000000..f45806e79 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/passthrough/config.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class PassthroughImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=None, + description="The URL for the passthrough endpoint", + ) + + @classmethod + def sample_run_config( + cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs + ) -> dict[str, Any]: + return { + "base_url": base_url, + "api_key": api_key, + } diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py new file mode 100644 index 000000000..b0e2e74ad --- /dev/null +++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -0,0 +1,135 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncIterator + +from openai import AsyncOpenAI + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack_api import ( + Inference, + Model, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) + +from .config import PassthroughImplConfig + + +class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference): + def __init__(self, config: PassthroughImplConfig) -> None: + self.config = config + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + + async def unregister_model(self, model_id: str) -> None: + pass + + async def register_model(self, model: Model) -> Model: + return model + + async def list_models(self) -> list[Model]: + """List models by calling the downstream /v1/models endpoint.""" + client = self._get_openai_client() + + response = await client.models.list() + + # Convert from OpenAI format to Llama Stack Model format + models = [] + for model_data in response.data: + downstream_model_id = model_data.id + custom_metadata = getattr(model_data, "custom_metadata", {}) or {} + + # Prefix identifier with provider ID for local registry + local_identifier = f"{self.__provider_id__}/{downstream_model_id}" + + model = Model( + identifier=local_identifier, + provider_id=self.__provider_id__, + provider_resource_id=downstream_model_id, + model_type=custom_metadata.get("model_type", "llm"), + metadata=custom_metadata, + ) + models.append(model) + + return models + + async def should_refresh_models(self) -> bool: + """Passthrough should refresh models since they come from downstream dynamically.""" + return self.config.refresh_models + + def _get_openai_client(self) -> AsyncOpenAI: + """Get an AsyncOpenAI client configured for the downstream server.""" + base_url = self._get_passthrough_url() + api_key = self._get_passthrough_api_key() + + return AsyncOpenAI( + base_url=f"{base_url.rstrip('/')}/v1", + api_key=api_key, + ) + + def _get_passthrough_url(self) -> str: + """Get the passthrough URL from config or provider data.""" + if self.config.base_url is not None: + return str(self.config.base_url) + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": }' + ) + return provider_data.passthrough_url + + def _get_passthrough_api_key(self) -> str: + """Get the passthrough API key from config or provider data.""" + if self.config.auth_credential is not None: + return self.config.auth_credential.get_secret_value() + + provider_data = self.get_request_provider_data() + if provider_data is None: + raise ValueError( + 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": }' + ) + return provider_data.passthrough_api_key + + async def openai_completion( + self, + params: OpenAICompletionRequestWithExtraBody, + ) -> OpenAICompletion: + """Forward completion request to downstream using OpenAI client.""" + client = self._get_openai_client() + request_params = params.model_dump(exclude_none=True) + response = await client.completions.create(**request_params) + return response # type: ignore + + async def openai_chat_completion( + self, + params: OpenAIChatCompletionRequestWithExtraBody, + ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: + """Forward chat completion request to downstream using OpenAI client.""" + client = self._get_openai_client() + request_params = params.model_dump(exclude_none=True) + response = await client.chat.completions.create(**request_params) + return response # type: ignore + + async def openai_embeddings( + self, + params: OpenAIEmbeddingsRequestWithExtraBody, + ) -> OpenAIEmbeddingsResponse: + """Forward embeddings request to downstream using OpenAI client.""" + client = self._get_openai_client() + request_params = params.model_dump(exclude_none=True) + response = await client.embeddings.create(**request_params) + return response # type: ignore diff --git a/llama_stack/providers/remote/inference/runpod/__init__.py b/src/llama_stack/providers/remote/inference/runpod/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/runpod/__init__.py rename to src/llama_stack/providers/remote/inference/runpod/__init__.py diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py new file mode 100644 index 000000000..8d06f5263 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/runpod/config.py @@ -0,0 +1,39 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class RunpodProviderDataValidator(BaseModel): + runpod_api_token: str | None = Field( + default=None, + description="API token for RunPod models", + ) + + +@json_schema_type +class RunpodImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=None, + description="The URL for the Runpod model serving endpoint", + ) + auth_credential: SecretStr | None = Field( + default=None, + alias="api_token", + description="The API token", + ) + + @classmethod + def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: + return { + "base_url": "${env.RUNPOD_URL:=}", + "api_token": "${env.RUNPOD_API_TOKEN}", + } diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py similarity index 91% rename from llama_stack/providers/remote/inference/runpod/runpod.py rename to src/llama_stack/providers/remote/inference/runpod/runpod.py index db60644ca..04ad12851 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py @@ -6,12 +6,12 @@ from collections.abc import AsyncIterator -from llama_stack.apis.inference import ( +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import RunpodImplConfig @@ -24,10 +24,11 @@ class RunpodInferenceAdapter(OpenAIMixin): """ config: RunpodImplConfig + provider_data_api_key_field: str = "runpod_api_token" def get_base_url(self) -> str: """Get base URL for OpenAI client.""" - return self.config.url + return str(self.config.base_url) async def openai_chat_completion( self, diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/src/llama_stack/providers/remote/inference/sambanova/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/sambanova/__init__.py rename to src/llama_stack/providers/remote/inference/sambanova/__init__.py diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py new file mode 100644 index 000000000..79cda75a0 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/sambanova/config.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class SambaNovaProviderDataValidator(BaseModel): + sambanova_api_key: str | None = Field( + default=None, + description="Sambanova Cloud API key", + ) + + +@json_schema_type +class SambaNovaImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.sambanova.ai/v1"), + description="The URL for the SambaNova AI server", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "base_url": "https://api.sambanova.ai/v1", + "api_key": api_key, + } diff --git a/src/llama_stack/providers/remote/inference/sambanova/sambanova.py b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py new file mode 100644 index 000000000..cb01e3a90 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin + +from .config import SambaNovaImplConfig + + +class SambaNovaInferenceAdapter(OpenAIMixin): + config: SambaNovaImplConfig + + provider_data_api_key_field: str = "sambanova_api_key" + download_images: bool = True # SambaNova does not support image downloads server-size, perform them on the client + """ + SambaNova Inference Adapter for Llama Stack. + """ + + def get_base_url(self) -> str: + """ + Get the base URL for OpenAI mixin. + + :return: The SambaNova base URL + """ + return str(self.config.base_url) diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/src/llama_stack/providers/remote/inference/tgi/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/tgi/__init__.py rename to src/llama_stack/providers/remote/inference/tgi/__init__.py diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py new file mode 100644 index 000000000..44cb4b812 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/tgi/config.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from pydantic import BaseModel, Field, HttpUrl, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class TGIImplConfig(RemoteInferenceProviderConfig): + auth_credential: SecretStr | None = Field(default=None, exclude=True) + + base_url: HttpUrl | None = Field( + default=None, + description="The URL for the TGI serving endpoint (should include /v1 path)", + ) + + @classmethod + def sample_run_config( + cls, + base_url: str = "${env.TGI_URL:=}", + **kwargs, + ): + return { + "base_url": base_url, + } + + +@json_schema_type +class InferenceEndpointImplConfig(BaseModel): + endpoint_name: str = Field( + description="The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided.", + ) + api_token: SecretStr | None = Field( + default=None, + description="Your Hugging Face user access token (will default to locally saved token if not provided)", + ) + + @classmethod + def sample_run_config( + cls, + endpoint_name: str = "${env.INFERENCE_ENDPOINT_NAME}", + api_token: str = "${env.HF_API_TOKEN}", + **kwargs, + ): + return { + "endpoint_name": endpoint_name, + "api_token": api_token, + } + + +@json_schema_type +class InferenceAPIImplConfig(BaseModel): + huggingface_repo: str = Field( + description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')", + ) + api_token: SecretStr | None = Field( + default=None, + description="Your Hugging Face user access token (will default to locally saved token if not provided)", + ) + + @classmethod + def sample_run_config( + cls, + repo: str = "${env.INFERENCE_MODEL}", + api_token: str = "${env.HF_API_TOKEN}", + **kwargs, + ): + return { + "huggingface_repo": repo, + "api_token": api_token, + } diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py similarity index 81% rename from llama_stack/providers/remote/inference/tgi/tgi.py rename to src/llama_stack/providers/remote/inference/tgi/tgi.py index 6ae7b2544..5dc8c33f7 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py @@ -8,14 +8,14 @@ from collections.abc import Iterable from huggingface_hub import AsyncInferenceClient, HfApi -from pydantic import SecretStr +from pydantic import HttpUrl, SecretStr -from llama_stack.apis.inference import ( +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig @@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi") class _HfAdapter(OpenAIMixin): - url: str + base_url: HttpUrl api_key: SecretStr hf_client: AsyncInferenceClient @@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin): return "NO KEY REQUIRED" def get_base_url(self): - return self.url + return self.base_url async def list_provider_model_ids(self) -> Iterable[str]: return [self.model_id] @@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin): class TGIAdapter(_HfAdapter): async def initialize(self, config: TGIImplConfig) -> None: - if not config.url: + if not config.base_url: raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") - log.info(f"Initializing TGI client with url={config.url}") - self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference") + log.info(f"Initializing TGI client with url={config.base_url}") + # Extract base URL without /v1 for HF client initialization + base_url_str = str(config.base_url).rstrip("/") + if base_url_str.endswith("/v1"): + base_url_for_client = base_url_str[:-3] + else: + base_url_for_client = base_url_str + self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference") endpoint_info = await self.hf_client.get_endpoint_info() self.max_tokens = endpoint_info["max_total_tokens"] self.model_id = endpoint_info["model_id"] - self.url = f"{config.url.rstrip('/')}/v1" + self.base_url = config.base_url self.api_key = SecretStr("NO_KEY") diff --git a/llama_stack/providers/remote/inference/together/__init__.py b/src/llama_stack/providers/remote/inference/together/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/together/__init__.py rename to src/llama_stack/providers/remote/inference/together/__init__.py diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py new file mode 100644 index 000000000..16f0686ba --- /dev/null +++ b/src/llama_stack/providers/remote/inference/together/config.py @@ -0,0 +1,27 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class TogetherImplConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=HttpUrl("https://api.together.xyz/v1"), + description="The URL for the Together AI server", + ) + + @classmethod + def sample_run_config(cls, **kwargs) -> dict[str, Any]: + return { + "base_url": "https://api.together.xyz/v1", + "api_key": "${env.TOGETHER_API_KEY:=}", + } diff --git a/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py similarity index 85% rename from llama_stack/providers/remote/inference/together/together.py rename to src/llama_stack/providers/remote/inference/together/together.py index e31ebf7c5..0826dbcd2 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/src/llama_stack/providers/remote/inference/together/together.py @@ -6,19 +6,19 @@ from collections.abc import Iterable +from typing import Any, cast -from together import AsyncTogether -from together.constants import BASE_URL +from together import AsyncTogether # type: ignore[import-untyped] -from llama_stack.apis.inference import ( - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) -from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage -from llama_stack.apis.models import Model from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + Model, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, +) from .config import TogetherImplConfig @@ -41,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): provider_data_api_key_field: str = "together_api_key" def get_base_url(self): - return BASE_URL + return str(self.config.base_url) def _get_client(self) -> AsyncTogether: together_api_key = None @@ -81,10 +81,11 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): if params.dimensions is not None: raise ValueError("Together's embeddings endpoint does not support dimensions param.") + # Cast encoding_format to match OpenAI SDK's expected Literal type response = await self.client.embeddings.create( model=await self._get_provider_model_id(params.model), input=params.input, - encoding_format=params.encoding_format, + encoding_format=cast(Any, params.encoding_format), ) response.model = ( @@ -97,6 +98,8 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): logger.warning( f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s." ) - response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) + # Cast to allow monkey-patching the response object + response.usage = cast(Any, OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)) - return response # type: ignore[no-any-return] + # Together's CreateEmbeddingResponse is compatible with OpenAIEmbeddingsResponse after monkey-patching + return cast(OpenAIEmbeddingsResponse, response) diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/src/llama_stack/providers/remote/inference/vertexai/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/vertexai/__init__.py rename to src/llama_stack/providers/remote/inference/vertexai/__init__.py diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py new file mode 100644 index 000000000..5891f7cd0 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/vertexai/config.py @@ -0,0 +1,48 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, SecretStr + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class VertexAIProviderDataValidator(BaseModel): + vertex_project: str | None = Field( + default=None, + description="Google Cloud project ID for Vertex AI", + ) + vertex_location: str | None = Field( + default=None, + description="Google Cloud location for Vertex AI (e.g., us-central1)", + ) + + +@json_schema_type +class VertexAIConfig(RemoteInferenceProviderConfig): + auth_credential: SecretStr | None = Field(default=None, exclude=True) + + project: str = Field( + description="Google Cloud project ID for Vertex AI", + ) + location: str = Field( + default="us-central1", + description="Google Cloud location for Vertex AI", + ) + + @classmethod + def sample_run_config( + cls, + project: str = "${env.VERTEX_AI_PROJECT:=}", + location: str = "${env.VERTEX_AI_LOCATION:=us-central1}", + **kwargs, + ) -> dict[str, Any]: + return { + "project": project, + "location": location, + } diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py similarity index 79% rename from llama_stack/providers/remote/inference/vertexai/vertexai.py rename to src/llama_stack/providers/remote/inference/vertexai/vertexai.py index 647c8c752..7941f8c89 100644 --- a/llama_stack/providers/remote/inference/vertexai/vertexai.py +++ b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from collections.abc import Iterable import google.auth.transport.requests from google.auth import default @@ -42,3 +43,12 @@ class VertexAIInferenceAdapter(OpenAIMixin): Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai """ return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi" + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden + For now we return a hardcoded version of the available models + + :return: An iterable of model IDs + """ + return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"] diff --git a/llama_stack/providers/remote/inference/vllm/__init__.py b/src/llama_stack/providers/remote/inference/vllm/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/vllm/__init__.py rename to src/llama_stack/providers/remote/inference/vllm/__init__.py diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py new file mode 100644 index 000000000..db6c74431 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/vllm/config.py @@ -0,0 +1,59 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from pydantic import Field, HttpUrl, SecretStr, field_validator + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default=None, + description="The URL for the vLLM model serving endpoint", + ) + max_tokens: int = Field( + default=4096, + description="Maximum number of tokens to generate.", + ) + auth_credential: SecretStr | None = Field( + default=None, + alias="api_token", + description="The API token", + ) + tls_verify: bool | str = Field( + default=True, + description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.", + ) + + @field_validator("tls_verify") + @classmethod + def validate_tls_verify(cls, v): + if isinstance(v, str): + # Otherwise, treat it as a cert path + cert_path = Path(v).expanduser().resolve() + if not cert_path.exists(): + raise ValueError(f"TLS certificate file does not exist: {v}") + if not cert_path.is_file(): + raise ValueError(f"TLS certificate path is not a file: {v}") + return v + return v + + @classmethod + def sample_run_config( + cls, + base_url: str = "${env.VLLM_URL:=}", + **kwargs, + ): + return { + "base_url": base_url, + "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", + "api_token": "${env.VLLM_API_TOKEN:=fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:=true}", + } diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py similarity index 92% rename from llama_stack/providers/remote/inference/vllm/vllm.py rename to src/llama_stack/providers/remote/inference/vllm/vllm.py index 74a18f3de..6664ca36b 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,22 +7,18 @@ from collections.abc import AsyncIterator from urllib.parse import urljoin import httpx -from openai.types.chat.chat_completion_chunk import ( - ChatCompletionChunk as OpenAIChatCompletionChunk, -) from pydantic import ConfigDict -from llama_stack.apis.inference import ( +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + HealthResponse, + HealthStatus, OpenAIChatCompletion, + OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ToolChoice, ) -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - HealthResponse, - HealthStatus, -) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import VLLMInferenceAdapterConfig @@ -43,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin): def get_base_url(self) -> str: """Get the base URL from config.""" - if not self.config.url: + if not self.config.base_url: raise ValueError("No base URL configured") - return self.config.url + return str(self.config.base_url) async def initialize(self) -> None: - if not self.config.url: + if not self.config.base_url: raise ValueError( "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." ) diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/src/llama_stack/providers/remote/inference/watsonx/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/watsonx/__init__.py rename to src/llama_stack/providers/remote/inference/watsonx/__init__.py diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py new file mode 100644 index 000000000..be2b2c0ab --- /dev/null +++ b/src/llama_stack/providers/remote/inference/watsonx/config.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field, HttpUrl + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type + + +class WatsonXProviderDataValidator(BaseModel): + watsonx_project_id: str | None = Field( + default=None, + description="IBM WatsonX project ID", + ) + watsonx_api_key: str | None = None + + +@json_schema_type +class WatsonXConfig(RemoteInferenceProviderConfig): + base_url: HttpUrl | None = Field( + default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), + description="A base url for accessing the watsonx.ai", + ) + project_id: str | None = Field( + default=None, + description="The watsonx.ai project ID", + ) + timeout: int = Field( + default=60, + description="Timeout for the HTTP requests", + ) + + @classmethod + def sample_run_config(cls, **kwargs) -> dict[str, Any]: + return { + "base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", + "api_key": "${env.WATSONX_API_KEY:=}", + "project_id": "${env.WATSONX_PROJECT_ID:=}", + } diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py similarity index 92% rename from llama_stack/providers/remote/inference/watsonx/watsonx.py rename to src/llama_stack/providers/remote/inference/watsonx/watsonx.py index 2c051719b..2fcda370a 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -10,7 +10,13 @@ from typing import Any import litellm import requests -from llama_stack.apis.inference.inference import ( +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin +from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params +from llama_stack_api import ( + Model, + ModelType, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -20,13 +26,6 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.apis.models import Model -from llama_stack.apis.models.models import ModelType -from llama_stack.log import get_logger -from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin -from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params -from llama_stack.providers.utils.telemetry.tracing import get_current_span logger = get_logger(name=__name__, category="providers::remote::watsonx") @@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): # Add usage tracking for streaming when telemetry is active stream_options = params.stream_options - if params.stream and get_current_span() is not None: + if params.stream: if stream_options is None: stream_options = {"include_usage": True} elif "include_usage" not in stream_options: @@ -238,8 +237,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): ) # Convert response to OpenAI format - from llama_stack.apis.inference import OpenAIEmbeddingUsage from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response + from llama_stack_api import OpenAIEmbeddingUsage data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) @@ -255,7 +254,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): ) def get_base_url(self) -> str: - return self.config.url + return str(self.config.base_url) # Copied from OpenAIMixin async def check_model_availability(self, model: str) -> bool: @@ -283,8 +282,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): # ... provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}" if "embedding" in functions: - embedding_dimension = model_spec["model_limits"]["embedding_dimension"] - context_length = model_spec["model_limits"]["max_sequence_length"] + embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0) + context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0) embedding_metadata = { "embedding_dimension": embedding_dimension, "context_length": context_length, @@ -306,10 +305,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): metadata={}, model_type=ModelType.llm, ) - # In theory, I guess it is possible that a model could be both an embedding model and a text chat model. - # In that case, the cache will record the generator Model object, and the list which we return will have - # both the generator Model object and the text chat Model object. That's fine because the cache is - # only used for check_model_availability() anyway. self._model_cache[provider_resource_id] = model models.append(model) return models @@ -320,7 +315,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): """ Retrieves foundation model specifications from the watsonx.ai API. """ - url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25" + url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25" headers = { # Note that there is no authorization header. Listing models does not require authentication. "Content-Type": "application/json", diff --git a/llama_stack/providers/inline/telemetry/__init__.py b/src/llama_stack/providers/remote/post_training/__init__.py similarity index 100% rename from llama_stack/providers/inline/telemetry/__init__.py rename to src/llama_stack/providers/remote/post_training/__init__.py diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md new file mode 100644 index 000000000..f998f44ba --- /dev/null +++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md @@ -0,0 +1,151 @@ +# NVIDIA Post-Training Provider for LlamaStack + +This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service. + +## Features + +- Supervised fine-tuning of Llama models +- LoRA fine-tuning support +- Job management and status tracking + +## Getting Started + +### Prerequisites + +- LlamaStack with NVIDIA configuration +- Access to Hosted NVIDIA NeMo Customizer service +- Dataset registered in the Hosted NVIDIA NeMo Customizer service +- Base model downloaded and available in the Hosted NVIDIA NeMo Customizer service + +### Setup + +Build the NVIDIA environment: + +```bash +uv pip install llama-stack-client +uv run llama stack list-deps nvidia | xargs -L1 uv pip install +``` + +### Basic Usage using the LlamaStack Python Client + +### Create Customization Job + +#### Initialize the client + +```python +import os + +os.environ["NVIDIA_API_KEY"] = "your-api-key" +os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" +os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" +os.environ["NVIDIA_PROJECT_ID"] = "test-project" +os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1" + +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() +``` + +#### Configure fine-tuning parameters + +```python +from llama_stack_client.types.post_training_supervised_fine_tune_params import ( + TrainingConfig, + TrainingConfigDataConfig, + TrainingConfigOptimizerConfig, +) +from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig +``` + +#### Set up LoRA configuration + +```python +algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16) +``` + +#### Configure training data + +```python +data_config = TrainingConfigDataConfig( + dataset_id="your-dataset-id", # Use client.datasets.list() to see available datasets + batch_size=16, +) +``` + +#### Configure optimizer + +```python +optimizer_config = TrainingConfigOptimizerConfig( + lr=0.0001, +) +``` + +#### Set up training configuration + +```python +training_config = TrainingConfig( + n_epochs=2, + data_config=data_config, + optimizer_config=optimizer_config, +) +``` + +#### Start fine-tuning job + +```python +training_job = client.post_training.supervised_fine_tune( + job_uuid="unique-job-id", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, +) +``` + +### List all jobs + +```python +jobs = client.post_training.job.list() +``` + +### Check job status + +```python +job_status = client.post_training.job.status(job_uuid="your-job-id") +``` + +### Cancel a job + +```python +client.post_training.job.cancel(job_uuid="your-job-id") +``` + +### Inference with the fine-tuned model + +#### 1. Register the model + +```python +from llama_stack_api.models import Model, ModelType + +client.models.register( + model_id="test-example-model@v1", + provider_id="nvidia", + provider_model_id="test-example-model@v1", + model_type=ModelType.llm, +) +``` + +#### 2. Inference with the fine-tuned model + +```python +response = client.completions.create( + prompt="Complete the sentence using one word: Roses are red, violets are ", + stream=False, + model="test-example-model@v1", + max_tokens=50, +) +print(response.choices[0].text) +``` diff --git a/llama_stack/providers/remote/post_training/nvidia/__init__.py b/src/llama_stack/providers/remote/post_training/nvidia/__init__.py similarity index 100% rename from llama_stack/providers/remote/post_training/nvidia/__init__.py rename to src/llama_stack/providers/remote/post_training/nvidia/__init__.py diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/src/llama_stack/providers/remote/post_training/nvidia/config.py similarity index 100% rename from llama_stack/providers/remote/post_training/nvidia/config.py rename to src/llama_stack/providers/remote/post_training/nvidia/config.py diff --git a/llama_stack/providers/remote/post_training/nvidia/models.py b/src/llama_stack/providers/remote/post_training/nvidia/models.py similarity index 100% rename from llama_stack/providers/remote/post_training/nvidia/models.py rename to src/llama_stack/providers/remote/post_training/nvidia/models.py diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py new file mode 100644 index 000000000..830a9f747 --- /dev/null +++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py @@ -0,0 +1,430 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +import warnings +from datetime import datetime +from typing import Any, Literal + +import aiohttp +from pydantic import BaseModel, ConfigDict + +from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig +from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack_api import ( + AlgorithmConfig, + DPOAlignmentConfig, + JobStatus, + PostTrainingJob, + PostTrainingJobArtifactsResponse, + PostTrainingJobStatusResponse, + TrainingConfig, +) + +from .models import _MODEL_ENTRIES + +# Map API status to JobStatus enum +STATUS_MAPPING = { + "running": JobStatus.in_progress.value, + "completed": JobStatus.completed.value, + "failed": JobStatus.failed.value, + "cancelled": JobStatus.cancelled.value, + "pending": JobStatus.scheduled.value, + "unknown": JobStatus.scheduled.value, +} + + +class NvidiaPostTrainingJob(PostTrainingJob): + """Parse the response from the Customizer API. + Inherits job_uuid from PostTrainingJob. + Adds status, created_at, updated_at parameters. + Passes through all other parameters from data field in the response. + """ + + model_config = ConfigDict(extra="allow") + status: JobStatus + created_at: datetime + updated_at: datetime + + +class ListNvidiaPostTrainingJobs(BaseModel): + data: list[NvidiaPostTrainingJob] + + +class NvidiaPostTrainingJobStatusResponse(PostTrainingJobStatusResponse): + model_config = ConfigDict(extra="allow") + + +class NvidiaPostTrainingAdapter(ModelRegistryHelper): + def __init__(self, config: NvidiaPostTrainingConfig): + self.config = config + self.headers = {} + if config.api_key: + self.headers["Authorization"] = f"Bearer {config.api_key}" + + self.timeout = aiohttp.ClientTimeout(total=config.timeout) + # TODO: filter by available models based on /config endpoint + ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) + self.session = None + + self.customizer_url = config.customizer_url + if not self.customizer_url: + warnings.warn("Customizer URL is not set, using default value: http://nemo.test", stacklevel=2) + self.customizer_url = "http://nemo.test" + + async def _get_session(self) -> aiohttp.ClientSession: + if self.session is None or self.session.closed: + self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout) + return self.session + + async def _make_request( + self, + method: str, + path: str, + headers: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + json: dict[str, Any] | None = None, + **kwargs, + ) -> dict[str, Any]: + """Helper method to make HTTP requests to the Customizer API.""" + url = f"{self.customizer_url}{path}" + request_headers = self.headers.copy() + + if headers: + request_headers.update(headers) + + # Add content-type header for JSON requests + if json and "Content-Type" not in request_headers: + request_headers["Content-Type"] = "application/json" + + session = await self._get_session() + for _ in range(self.config.max_retries): + async with session.request(method, url, params=params, json=json, **kwargs) as response: + if response.status >= 400: + error_data = await response.json() + raise Exception(f"API request failed: {error_data}") + return await response.json() + + async def get_training_jobs( + self, + page: int | None = 1, + page_size: int | None = 10, + sort: Literal["created_at", "-created_at"] | None = "created_at", + ) -> ListNvidiaPostTrainingJobs: + """Get all customization jobs. + Updated the base class return type from ListPostTrainingJobsResponse to ListNvidiaPostTrainingJobs. + + Returns a ListNvidiaPostTrainingJobs object with the following fields: + - data: List[NvidiaPostTrainingJob] - List of NvidiaPostTrainingJob objects + + ToDo: Support for schema input for filtering. + """ + params = {"page": page, "page_size": page_size, "sort": sort} + + response = await self._make_request("GET", "/v1/customization/jobs", params=params) + + jobs = [] + for job in response.get("data", []): + job_id = job.pop("id") + job_status = job.pop("status", "scheduled").lower() + mapped_status = STATUS_MAPPING.get(job_status, "scheduled") + + # Convert string timestamps to datetime objects + created_at = ( + datetime.fromisoformat(job.pop("created_at")) + if "created_at" in job + else datetime.now(tz=datetime.timezone.utc) + ) + updated_at = ( + datetime.fromisoformat(job.pop("updated_at")) + if "updated_at" in job + else datetime.now(tz=datetime.timezone.utc) + ) + + # Create NvidiaPostTrainingJob instance + jobs.append( + NvidiaPostTrainingJob( + job_uuid=job_id, + status=JobStatus(mapped_status), + created_at=created_at, + updated_at=updated_at, + **job, + ) + ) + + return ListNvidiaPostTrainingJobs(data=jobs) + + async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse: + """Get the status of a customization job. + Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob. + + Returns a NvidiaPostTrainingJob object with the following fields: + - job_uuid: str - Unique identifier for the job + - status: JobStatus - Current status of the job (in_progress, completed, failed, cancelled, scheduled) + - created_at: datetime - The time when the job was created + - updated_at: datetime - The last time the job status was updated + + Additional fields that may be included: + - steps_completed: Optional[int] - Number of training steps completed + - epochs_completed: Optional[int] - Number of epochs completed + - percentage_done: Optional[float] - Percentage of training completed (0-100) + - best_epoch: Optional[int] - The epoch with the best performance + - train_loss: Optional[float] - Training loss of the best checkpoint + - val_loss: Optional[float] - Validation loss of the best checkpoint + - metrics: Optional[Dict] - Additional training metrics + - status_logs: Optional[List] - Detailed logs of status changes + """ + response = await self._make_request( + "GET", + f"/v1/customization/jobs/{job_uuid}/status", + params={"job_id": job_uuid}, + ) + + api_status = response.pop("status").lower() + mapped_status = STATUS_MAPPING.get(api_status, "scheduled") + + return NvidiaPostTrainingJobStatusResponse( + status=JobStatus(mapped_status), + job_uuid=job_uuid, + started_at=datetime.fromisoformat(response.pop("created_at")), + updated_at=datetime.fromisoformat(response.pop("updated_at")), + **response, + ) + + async def cancel_training_job(self, job_uuid: str) -> None: + await self._make_request( + method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid} + ) + + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: + raise NotImplementedError("Job artifacts are not implemented yet") + + async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: + raise NotImplementedError("Job artifacts are not implemented yet") + + async def supervised_fine_tune( + self, + job_uuid: str, + training_config: dict[str, Any], + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + model: str, + checkpoint_dir: str | None, + algorithm_config: AlgorithmConfig | None = None, + ) -> NvidiaPostTrainingJob: + """ + Fine-tunes a model on a dataset. + Currently only supports Lora finetuning for standlone docker container. + Assumptions: + - nemo microservice is running and endpoint is set in config.customizer_url + - dataset is registered separately in nemo datastore + - model checkpoint is downloaded as per nemo customizer requirements + + Parameters: + training_config: TrainingConfig - Configuration for training + model: str - NeMo Customizer configuration name + algorithm_config: Optional[AlgorithmConfig] - Algorithm-specific configuration + checkpoint_dir: Optional[str] - Directory containing model checkpoints, ignored atm + job_uuid: str - Unique identifier for the job, ignored atm + hyperparam_search_config: Dict[str, Any] - Configuration for hyperparameter search, ignored atm + logger_config: Dict[str, Any] - Configuration for logging, ignored atm + + Environment Variables: + - NVIDIA_API_KEY: str - API key for the NVIDIA API + Default: None + - NVIDIA_DATASET_NAMESPACE: str - Namespace of the dataset + Default: "default" + - NVIDIA_CUSTOMIZER_URL: str - URL of the NeMo Customizer API + Default: "http://nemo.test" + - NVIDIA_PROJECT_ID: str - ID of the project + Default: "test-project" + - NVIDIA_OUTPUT_MODEL_DIR: str - Directory to save the output model + Default: "test-example-model@v1" + + Supported models: + - meta/llama-3.1-8b-instruct + - meta/llama-3.2-1b-instruct + + Supported algorithm configs: + - LoRA, SFT + + Supported Parameters: + - TrainingConfig: + - n_epochs: int - Number of epochs to train + Default: 50 + - data_config: DataConfig - Configuration for the dataset + - optimizer_config: OptimizerConfig - Configuration for the optimizer + - dtype: str - Data type for training + not supported (users are informed via warnings) + - efficiency_config: EfficiencyConfig - Configuration for efficiency + not supported + - max_steps_per_epoch: int - Maximum number of steps per epoch + Default: 1000 + ## NeMo customizer specific parameters + - log_every_n_steps: int - Log every n steps + Default: None + - val_check_interval: float - Validation check interval + Default: 0.25 + - sequence_packing_enabled: bool - Sequence packing enabled + Default: False + ## NeMo customizer specific SFT parameters + - hidden_dropout: float - Hidden dropout + Default: None (0.0-1.0) + - attention_dropout: float - Attention dropout + Default: None (0.0-1.0) + - ffn_dropout: float - FFN dropout + Default: None (0.0-1.0) + + - DataConfig: + - dataset_id: str - Dataset ID + - batch_size: int - Batch size + Default: 8 + + - OptimizerConfig: + - lr: float - Learning rate + Default: 0.0001 + ## NeMo customizer specific parameter + - weight_decay: float - Weight decay + Default: 0.01 + + - LoRA config: + ## NeMo customizer specific LoRA parameters + - alpha: int - Scaling factor for the LoRA update + Default: 16 + Note: + - checkpoint_dir, hyperparam_search_config, logger_config are not supported (users are informed via warnings) + - Some parameters from TrainingConfig, DataConfig, OptimizerConfig are not supported (users are informed via warnings) + + User is informed about unsupported parameters via warnings. + """ + + # Check for unsupported method parameters + unsupported_method_params = [] + if checkpoint_dir: + unsupported_method_params.append(f"checkpoint_dir={checkpoint_dir}") + if hyperparam_search_config: + unsupported_method_params.append("hyperparam_search_config") + if logger_config: + unsupported_method_params.append("logger_config") + + if unsupported_method_params: + warnings.warn( + f"Parameters: {', '.join(unsupported_method_params)} are not supported and will be ignored", + stacklevel=2, + ) + + # Define all supported parameters + supported_params = { + "training_config": { + "n_epochs", + "data_config", + "optimizer_config", + "log_every_n_steps", + "val_check_interval", + "sequence_packing_enabled", + "hidden_dropout", + "attention_dropout", + "ffn_dropout", + }, + "data_config": {"dataset_id", "batch_size"}, + "optimizer_config": {"lr", "weight_decay"}, + "lora_config": {"type", "alpha"}, + } + + # Validate all parameters at once + warn_unsupported_params(training_config, supported_params["training_config"], "TrainingConfig") + warn_unsupported_params(training_config["data_config"], supported_params["data_config"], "DataConfig") + warn_unsupported_params( + training_config["optimizer_config"], supported_params["optimizer_config"], "OptimizerConfig" + ) + + output_model = self.config.output_model_dir + + # Prepare base job configuration + job_config = { + "config": model, + "dataset": { + "name": training_config["data_config"]["dataset_id"], + "namespace": self.config.dataset_namespace, + }, + "hyperparameters": { + "training_type": "sft", + "finetuning_type": "lora", + **{ + k: v + for k, v in { + "epochs": training_config.get("n_epochs"), + "batch_size": training_config["data_config"].get("batch_size"), + "learning_rate": training_config["optimizer_config"].get("lr"), + "weight_decay": training_config["optimizer_config"].get("weight_decay"), + "log_every_n_steps": training_config.get("log_every_n_steps"), + "val_check_interval": training_config.get("val_check_interval"), + "sequence_packing_enabled": training_config.get("sequence_packing_enabled"), + }.items() + if v is not None + }, + }, + "project": self.config.project_id, + # TODO: ignored ownership, add it later + # "ownership": {"created_by": self.config.user_id, "access_policies": self.config.access_policies}, + "output_model": output_model, + } + + # Handle SFT-specific optional parameters + job_config["hyperparameters"]["sft"] = { + k: v + for k, v in { + "ffn_dropout": training_config.get("ffn_dropout"), + "hidden_dropout": training_config.get("hidden_dropout"), + "attention_dropout": training_config.get("attention_dropout"), + }.items() + if v is not None + } + + # Remove the sft dictionary if it's empty + if not job_config["hyperparameters"]["sft"]: + job_config["hyperparameters"].pop("sft") + + # Handle LoRA-specific configuration + if algorithm_config: + if algorithm_config.type == "LoRA": + warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config") + job_config["hyperparameters"]["lora"] = { + k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None + } + else: + raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}") + + # Create the customization job + response = await self._make_request( + method="POST", + path="/v1/customization/jobs", + headers={"Accept": "application/json"}, + json=job_config, + ) + + job_uuid = response["id"] + response.pop("status") + created_at = datetime.fromisoformat(response.pop("created_at")) + updated_at = datetime.fromisoformat(response.pop("updated_at")) + + return NvidiaPostTrainingJob( + job_uuid=job_uuid, status=JobStatus.in_progress, created_at=created_at, updated_at=updated_at, **response + ) + + async def preference_optimize( + self, + job_uuid: str, + finetuned_model: str, + algorithm_config: DPOAlignmentConfig, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + ) -> PostTrainingJob: + """Optimize a model based on preference data.""" + raise NotImplementedError("Preference optimization is not implemented yet") + + async def get_training_job_container_logs(self, job_uuid: str) -> PostTrainingJobStatusResponse: + raise NotImplementedError("Job logs are not implemented yet") diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py new file mode 100644 index 000000000..bd40dacb4 --- /dev/null +++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py @@ -0,0 +1,63 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import warnings +from typing import Any + +from pydantic import BaseModel + +from llama_stack.log import get_logger +from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig +from llama_stack_api import TrainingConfig + +from .config import NvidiaPostTrainingConfig + +logger = get_logger(name=__name__, category="post_training::nvidia") + + +def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None: + keys = set(config_dict.__annotations__.keys()) if isinstance(config_dict, BaseModel) else config_dict.keys() + unsupported_params = [k for k in keys if k not in supported_keys] + if unsupported_params: + warnings.warn( + f"Parameters: {unsupported_params} in `{config_name}` not supported and will be ignored.", stacklevel=2 + ) + + +def validate_training_params( + training_config: dict[str, Any], supported_keys: set[str], config_name: str = "TrainingConfig" +) -> None: + """ + Validates training parameters against supported keys. + + Args: + training_config: Dictionary containing training configuration parameters + supported_keys: Set of supported parameter keys + config_name: Name of the configuration for warning messages + """ + sft_lora_fields = set(SFTLoRADefaultConfig.__annotations__.keys()) + training_config_fields = set(TrainingConfig.__annotations__.keys()) + + # Check for not supported parameters: + # - not in either of configs + # - in TrainingConfig but not in SFTLoRADefaultConfig + unsupported_params = [] + for key in training_config: + if isinstance(key, str) and key not in (supported_keys.union(sft_lora_fields)): + if key in (not sft_lora_fields or training_config_fields): + unsupported_params.append(key) + + if unsupported_params: + warnings.warn( + f"Parameters: {unsupported_params} in `{config_name}` are not supported and will be ignored.", stacklevel=2 + ) + + +# ToDo: implement post health checks for customizer are enabled +async def _get_health(url: str) -> tuple[bool, bool]: ... + + +async def check_health(config: NvidiaPostTrainingConfig) -> None: ... diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/src/llama_stack/providers/remote/safety/__init__.py similarity index 100% rename from llama_stack/providers/inline/tool_runtime/__init__.py rename to src/llama_stack/providers/remote/safety/__init__.py diff --git a/llama_stack/providers/remote/safety/bedrock/__init__.py b/src/llama_stack/providers/remote/safety/bedrock/__init__.py similarity index 100% rename from llama_stack/providers/remote/safety/bedrock/__init__.py rename to src/llama_stack/providers/remote/safety/bedrock/__init__.py diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py new file mode 100644 index 000000000..c321f759b --- /dev/null +++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py @@ -0,0 +1,111 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from typing import Any + +from llama_stack.log import get_logger +from llama_stack.providers.utils.bedrock.client import create_bedrock_client +from llama_stack_api import ( + OpenAIMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ShieldsProtocolPrivate, + ViolationLevel, +) + +from .config import BedrockSafetyConfig + +logger = get_logger(name=__name__, category="safety::bedrock") + + +class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate): + def __init__(self, config: BedrockSafetyConfig) -> None: + self.config = config + self.registered_shields = [] + + async def initialize(self) -> None: + try: + self.bedrock_runtime_client = create_bedrock_client(self.config) + self.bedrock_client = create_bedrock_client(self.config, "bedrock") + except Exception as e: + raise RuntimeError("Error initializing BedrockSafetyAdapter") from e + + async def shutdown(self) -> None: + pass + + async def register_shield(self, shield: Shield) -> None: + response = self.bedrock_client.list_guardrails( + guardrailIdentifier=shield.provider_resource_id, + ) + if ( + not response["guardrails"] + or len(response["guardrails"]) == 0 + or response["guardrails"][0]["version"] != shield.params["guardrailVersion"] + ): + raise ValueError( + f"Shield {shield.provider_resource_id} with version {shield.params['guardrailVersion']} not found in Bedrock" + ) + + async def unregister_shield(self, identifier: str) -> None: + pass + + async def run_shield( + self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] = None + ) -> RunShieldResponse: + shield = await self.shield_store.get_shield(shield_id) + if not shield: + raise ValueError(f"Shield {shield_id} not found") + + """ + This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format + ```content = [ + { + "text": { + "text": "Is the AB503 Product a better investment than the S&P 500?" + } + } + ]``` + Incoming messages contain content, role . For now we will extract the content and + default the "qualifiers": ["query"] + """ + + shield_params = shield.params + logger.debug(f"run_shield::{shield_params}::messages={messages}") + + # - convert the messages into format Bedrock expects + content_messages = [] + for message in messages: + content_messages.append({"text": {"text": message.content}}) + logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:") + + response = self.bedrock_runtime_client.apply_guardrail( + guardrailIdentifier=shield.provider_resource_id, + guardrailVersion=shield_params["guardrailVersion"], + source="OUTPUT", # or 'INPUT' depending on your use case + content=content_messages, + ) + if response["action"] == "GUARDRAIL_INTERVENED": + user_message = "" + metadata = {} + for output in response["outputs"]: + # guardrails returns a list - however for this implementation we will leverage the last values + user_message = output["text"] + for assessment in response["assessments"]: + # guardrails returns a list - however for this implementation we will leverage the last values + metadata = dict(assessment) + + return RunShieldResponse( + violation=SafetyViolation( + user_message=user_message, + violation_level=ViolationLevel.ERROR, + metadata=metadata, + ) + ) + + return RunShieldResponse() diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py new file mode 100644 index 000000000..0b1f2581a --- /dev/null +++ b/src/llama_stack/providers/remote/safety/bedrock/config.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig +from llama_stack_api import json_schema_type + + +@json_schema_type +class BedrockSafetyConfig(BedrockBaseConfig): + pass diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md new file mode 100644 index 000000000..f3ec0f1e0 --- /dev/null +++ b/src/llama_stack/providers/remote/safety/nvidia/README.md @@ -0,0 +1,78 @@ +# NVIDIA Safety Provider for LlamaStack + +This provider enables safety checks and guardrails for LLM interactions using NVIDIA's NeMo Guardrails service. + +## Features + +- Run safety checks for messages + +## Getting Started + +### Prerequisites + +- LlamaStack with NVIDIA configuration +- Access to NVIDIA NeMo Guardrails service +- NIM for model to use for safety check is deployed + +### Setup + +Build the NVIDIA environment: + +```bash +uv pip install llama-stack-client +uv run llama stack list-deps nvidia | xargs -L1 uv pip install +``` + +### Basic Usage using the LlamaStack Python Client + +#### Initialize the client + +```python +import os + +os.environ["NVIDIA_API_KEY"] = "your-api-key" +os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test" + +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() +``` + +#### Create a safety shield + +```python +from llama_stack_api.safety import Shield +from llama_stack_api.inference import Message + +# Create a safety shield +shield = Shield( + shield_id="your-shield-id", + provider_resource_id="safety-model-id", # The model to use for safety checks + description="Safety checks for content moderation", +) + +# Register the shield +await client.safety.register_shield(shield) +``` + +#### Run safety checks + +```python +# Messages to check +messages = [Message(role="user", content="Your message to check")] + +# Run safety check +response = await client.safety.run_shield( + shield_id="your-shield-id", + messages=messages, +) + +# Check for violations +if response.violation: + print(f"Safety violation detected: {response.violation.user_message}") + print(f"Violation level: {response.violation.violation_level}") + print(f"Metadata: {response.violation.metadata}") +else: + print("No safety violations detected") +``` diff --git a/llama_stack/providers/remote/safety/nvidia/__init__.py b/src/llama_stack/providers/remote/safety/nvidia/__init__.py similarity index 100% rename from llama_stack/providers/remote/safety/nvidia/__init__.py rename to src/llama_stack/providers/remote/safety/nvidia/__init__.py diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py new file mode 100644 index 000000000..f11de5feb --- /dev/null +++ b/src/llama_stack/providers/remote/safety/nvidia/config.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +import os +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack_api import json_schema_type + + +@json_schema_type +class NVIDIASafetyConfig(BaseModel): + """ + Configuration for the NVIDIA Guardrail microservice endpoint. + + Attributes: + guardrails_service_url (str): A base url for accessing the NVIDIA guardrail endpoint, e.g. http://0.0.0.0:7331 + config_id (str): The ID of the guardrails configuration to use from the configuration store + (https://developer.nvidia.com/docs/nemo-microservices/guardrails/source/guides/configuration-store-guide.html) + + """ + + guardrails_service_url: str = Field( + default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"), + description="The url for accessing the Guardrails service", + ) + config_id: str | None = Field( + default_factory=lambda: os.getenv("NVIDIA_GUARDRAILS_CONFIG_ID", "self-check"), + description="Guardrails configuration ID to use from the Guardrails configuration store", + ) + + @classmethod + def sample_run_config(cls, **kwargs) -> dict[str, Any]: + return { + "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}", + "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}", + } diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py new file mode 100644 index 000000000..43ff45cc9 --- /dev/null +++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -0,0 +1,167 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +import requests + +from llama_stack.log import get_logger +from llama_stack_api import ( + ModerationObject, + OpenAIMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ShieldsProtocolPrivate, + ViolationLevel, +) + +from .config import NVIDIASafetyConfig + +logger = get_logger(name=__name__, category="safety::nvidia") + + +class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): + def __init__(self, config: NVIDIASafetyConfig) -> None: + """ + Initialize the NVIDIASafetyAdapter with a given safety configuration. + + Args: + config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID. + """ + self.config = config + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + + async def register_shield(self, shield: Shield) -> None: + if not shield.provider_resource_id: + raise ValueError("Shield model not provided.") + + async def unregister_shield(self, identifier: str) -> None: + pass + + async def run_shield( + self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None + ) -> RunShieldResponse: + """ + Run a safety shield check against the provided messages. + + Args: + shield_id (str): The unique identifier for the shield to be used. + messages (List[Message]): A list of Message objects representing the conversation history. + params (Optional[dict[str, Any]]): Additional parameters for the shield check. + + Returns: + RunShieldResponse: The response containing safety violation details if any. + + Raises: + ValueError: If the shield with the provided shield_id is not found. + """ + shield = await self.shield_store.get_shield(shield_id) + if not shield: + raise ValueError(f"Shield {shield_id} not found") + + self.shield = NeMoGuardrails(self.config, shield.shield_id) + return await self.shield.run(messages) + + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: + raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation") + + +class NeMoGuardrails: + """ + A class that encapsulates NVIDIA's guardrails safety logic. + + Sends messages to the guardrails service and interprets the response to determine + if a safety violation has occurred. + """ + + def __init__( + self, + config: NVIDIASafetyConfig, + model: str, + threshold: float = 0.9, + temperature: float = 1.0, + ): + """ + Initialize a NeMoGuardrails instance with the provided parameters. + + Args: + config (NVIDIASafetyConfig): The safety configuration containing the config ID and guardrails URL. + model (str): The identifier or name of the model to be used for safety checks. + threshold (float, optional): The threshold for flagging violations. Defaults to 0.9. + temperature (float, optional): The temperature setting for the underlying model. Must be greater than 0. Defaults to 1.0. + + Raises: + ValueError: If temperature is less than or equal to 0. + AssertionError: If config_id is not provided in the configuration. + """ + self.config_id = config.config_id + self.model = model + assert self.config_id is not None, "Must provide config id" + if temperature <= 0: + raise ValueError("Temperature must be greater than 0") + + self.temperature = temperature + self.threshold = threshold + self.guardrails_service_url = config.guardrails_service_url + + async def _guardrails_post(self, path: str, data: Any | None): + """Helper for making POST requests to the guardrails service.""" + headers = { + "Accept": "application/json", + } + response = requests.post(url=f"{self.guardrails_service_url}{path}", headers=headers, json=data) + response.raise_for_status() + return response.json() + + async def run(self, messages: list[OpenAIMessageParam]) -> RunShieldResponse: + """ + Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API. + + Args: + messages (List[Message]): A list of Message objects to be checked for safety violations. + + Returns: + RunShieldResponse: If the response indicates a violation ("blocked" status), returns a + RunShieldResponse with a SafetyViolation; otherwise, returns a RunShieldResponse with violation set to None. + + Raises: + requests.HTTPError: If the POST request fails. + """ + request_data = { + "model": self.model, + "messages": [{"role": message.role, "content": message.content} for message in messages], + "temperature": self.temperature, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, + "max_tokens": 160, + "stream": False, + "guardrails": { + "config_id": self.config_id, + }, + } + response = await self._guardrails_post(path="/v1/guardrail/checks", data=request_data) + + if response["status"] == "blocked": + user_message = "Sorry I cannot do this." + metadata = response["rails_status"] + + return RunShieldResponse( + violation=SafetyViolation( + user_message=user_message, + violation_level=ViolationLevel.ERROR, + metadata=metadata, + ) + ) + + return RunShieldResponse(violation=None) diff --git a/llama_stack/providers/remote/safety/sambanova/__init__.py b/src/llama_stack/providers/remote/safety/sambanova/__init__.py similarity index 100% rename from llama_stack/providers/remote/safety/sambanova/__init__.py rename to src/llama_stack/providers/remote/safety/sambanova/__init__.py diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py new file mode 100644 index 000000000..bfb42d88a --- /dev/null +++ b/src/llama_stack/providers/remote/safety/sambanova/config.py @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field, SecretStr + +from llama_stack_api import json_schema_type + + +class SambaNovaProviderDataValidator(BaseModel): + sambanova_api_key: str | None = Field( + default=None, + description="Sambanova Cloud API key", + ) + + +@json_schema_type +class SambaNovaSafetyConfig(BaseModel): + url: str = Field( + default="https://api.sambanova.ai/v1", + description="The URL for the SambaNova AI server", + ) + api_key: SecretStr | None = Field( + default=None, + description="The SambaNova cloud API Key", + ) + + @classmethod + def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: + return { + "url": "https://api.sambanova.ai/v1", + "api_key": api_key, + } diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py new file mode 100644 index 000000000..c11cb544d --- /dev/null +++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -0,0 +1,98 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +import litellm +import requests + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack_api import ( + OpenAIMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ShieldsProtocolPrivate, + ViolationLevel, +) + +from .config import SambaNovaSafetyConfig + +logger = get_logger(name=__name__, category="safety::sambanova") + +CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" + + +class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData): + def __init__(self, config: SambaNovaSafetyConfig) -> None: + self.config = config + self.environment_available_models = [] + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + + def _get_api_key(self) -> str: + config_api_key = self.config.api_key if self.config.api_key else None + if config_api_key: + return config_api_key.get_secret_value() + else: + provider_data = self.get_request_provider_data() + if provider_data is None or not provider_data.sambanova_api_key: + raise ValueError( + 'Pass Sambanova API Key in the header X-LlamaStack-Provider-Data as { "sambanova_api_key": }' + ) + return provider_data.sambanova_api_key + + async def register_shield(self, shield: Shield) -> None: + list_models_url = self.config.url + "/models" + if len(self.environment_available_models) == 0: + try: + response = requests.get(list_models_url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Request to {list_models_url} failed") from e + self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] + if ( + "guard" not in shield.provider_resource_id.lower() + or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models + ): + logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}") + + async def unregister_shield(self, identifier: str) -> None: + pass + + async def run_shield( + self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None + ) -> RunShieldResponse: + shield = await self.shield_store.get_shield(shield_id) + if not shield: + raise ValueError(f"Shield {shield_id} not found") + + shield_params = shield.params + logger.debug(f"run_shield::{shield_params}::messages={messages}") + + response = litellm.completion(model=shield.provider_resource_id, messages=messages, api_key=self._get_api_key()) + shield_message = response.choices[0].message.content + + if "unsafe" in shield_message.lower(): + user_message = CANNED_RESPONSE_TEXT + violation_type = shield_message.split("\n")[-1] + metadata = {"violation_type": violation_type} + + return RunShieldResponse( + violation=SafetyViolation( + user_message=user_message, + violation_level=ViolationLevel.ERROR, + metadata=metadata, + ) + ) + + return RunShieldResponse() diff --git a/llama_stack/providers/inline/vector_io/__init__.py b/src/llama_stack/providers/remote/tool_runtime/__init__.py similarity index 100% rename from llama_stack/providers/inline/vector_io/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/__init__.py diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/bing_search/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py similarity index 90% rename from llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py rename to src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py index 9a98964b7..77c5a3bf7 100644 --- a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py @@ -9,16 +9,16 @@ from typing import Any import httpx -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import BingSearchToolConfig @@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq return provider_data.bing_search_api_key async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, ) -> ListToolDefsResponse: return ListToolDefsResponse( data=[ @@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq ] ) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: api_key = self._get_api_key() headers = { "Ocp-Apim-Subscription-Key": api_key, diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/config.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/config.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/bing_search/config.py rename to src/llama_stack/providers/remote/tool_runtime/bing_search/config.py diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/brave_search/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py similarity index 93% rename from llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py rename to src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py index 02e5b5c69..1f49671cf 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py @@ -8,17 +8,17 @@ from typing import Any import httpx -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.models.llama.datatypes import BuiltinTool +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.models.llama.datatypes import BuiltinTool -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import BraveSearchToolConfig @@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe return provider_data.brave_search_api_key async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, ) -> ListToolDefsResponse: return ListToolDefsResponse( data=[ @@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe ] ) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: api_key = self._get_api_key() url = "https://api.search.brave.com/res/v1/web/search" headers = { diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/config.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/brave_search/config.py rename to src/llama_stack/providers/remote/tool_runtime/brave_search/config.py diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py new file mode 100644 index 000000000..9acabfc34 --- /dev/null +++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel + + +class MCPProviderDataValidator(BaseModel): + """ + Validator for MCP provider-specific data passed via request headers. + + Phase 1: Support old header-based authentication for backward compatibility. + In Phase 2, this will be deprecated in favor of the authorization parameter. + """ + + mcp_headers: dict[str, dict[str, str]] | None = None # Map of URI -> headers dict + + +class MCPProviderConfig(BaseModel): + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return {} diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py new file mode 100644 index 000000000..97b044dbf --- /dev/null +++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py @@ -0,0 +1,115 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any +from urllib.parse import urlparse + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools +from llama_stack_api import ( + URL, + Api, + ListToolDefsResponse, + ToolGroup, + ToolGroupsProtocolPrivate, + ToolInvocationResult, + ToolRuntime, +) + +from .config import MCPProviderConfig + +logger = get_logger(__name__, category="tools") + + +class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRequestProviderData): + def __init__(self, config: MCPProviderConfig, _deps: dict[Api, Any]): + self.config = config + + async def initialize(self): + pass + + async def register_toolgroup(self, toolgroup: ToolGroup) -> None: + pass + + async def unregister_toolgroup(self, toolgroup_id: str) -> None: + return + + async def list_runtime_tools( + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, + ) -> ListToolDefsResponse: + # this endpoint should be retrieved by getting the tool group right? + if mcp_endpoint is None: + raise ValueError("mcp_endpoint is required") + + # Get other headers from provider data (but NOT authorization) + provider_headers = await self.get_headers_from_request(mcp_endpoint.uri) + + return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization) + + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: + tool = await self.tool_store.get_tool(tool_name) + if tool.metadata is None or tool.metadata.get("endpoint") is None: + raise ValueError(f"Tool {tool_name} does not have metadata") + endpoint = tool.metadata.get("endpoint") + if urlparse(endpoint).scheme not in ("http", "https"): + raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL") + + # Get other headers from provider data (but NOT authorization) + provider_headers = await self.get_headers_from_request(endpoint) + + return await invoke_mcp_tool( + endpoint=endpoint, + tool_name=tool_name, + kwargs=kwargs, + headers=provider_headers, + authorization=authorization, + ) + + async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]: + """ + Extract headers from request provider data, excluding authorization. + + Authorization must be provided via the dedicated authorization parameter. + If Authorization is found in mcp_headers, raise an error to guide users to the correct approach. + + Args: + mcp_endpoint_uri: The MCP endpoint URI to match against provider data + + Returns: + dict[str, str]: Headers dictionary (without Authorization) + + Raises: + ValueError: If Authorization header is found in mcp_headers + """ + + def canonicalize_uri(uri: str) -> str: + return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}" + + headers = {} + + provider_data = self.get_request_provider_data() + if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers: + for uri, values in provider_data.mcp_headers.items(): + if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri): + continue + + # Reject Authorization in mcp_headers - must use authorization parameter + for key in values.keys(): + if key.lower() == "authorization": + raise ValueError( + "Authorization cannot be provided via mcp_headers in provider_data. " + "Please use the dedicated 'authorization' parameter instead. " + "Example: tool_runtime.invoke_tool(..., authorization='your-token')" + ) + headers[key] = values[key] + + return headers diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/tavily_search/config.py rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py similarity index 88% rename from llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index ca629fced..e12b41885 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -9,16 +9,16 @@ from typing import Any import httpx -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import TavilySearchToolConfig @@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR return provider_data.tavily_search_api_key async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, ) -> ListToolDefsResponse: return ListToolDefsResponse( data=[ @@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR ] ) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: api_key = self._get_api_key() async with httpx.AsyncClient() as client: response = await client.post( diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py similarity index 93% rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py index 410e34195..68f0ebaef 100644 --- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py @@ -9,16 +9,16 @@ from typing import Any import httpx -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import WolframAlphaToolConfig @@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR return provider_data.wolfram_alpha_api_key async def list_runtime_tools( - self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, ) -> ListToolDefsResponse: return ListToolDefsResponse( data=[ @@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR ] ) - async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: + async def invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ) -> ToolInvocationResult: api_key = self._get_api_key() params = { "input": kwargs["query"], diff --git a/llama_stack/providers/registry/__init__.py b/src/llama_stack/providers/remote/vector_io/__init__.py similarity index 100% rename from llama_stack/providers/registry/__init__.py rename to src/llama_stack/providers/remote/vector_io/__init__.py diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py new file mode 100644 index 000000000..d774ea643 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Api, ProviderSpec + +from .config import ChromaVectorIOConfig + + +async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): + from .chroma import ChromaVectorIOAdapter + + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py similarity index 91% rename from llama_stack/providers/remote/vector_io/chroma/chroma.py rename to src/llama_stack/providers/remote/vector_io/chroma/chroma.py index 0da459eba..ade1b2dc0 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -12,19 +12,23 @@ from urllib.parse import urlparse import chromadb from numpy.typing import NDArray -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator - +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -147,7 +151,6 @@ class ChromaIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) - async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a single chunk from the Chroma collection by its ID.""" ids = [f"{chunk.document_id}:{chunk.chunk_id}" for chunk in chunks_for_deletion] @@ -232,7 +235,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) - self.vector_store_table = self.kvstore if isinstance(self.config, RemoteChromaVectorIOConfig): log.info(f"Connecting to Chroma server at: {self.config.url}") @@ -270,20 +272,20 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc await self.cache[vector_store_id].index.delete() del self.cache[vector_store_id] - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) if index is None: - raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") + raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) if index is None: - raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") + raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") return await index.query_chunks(query, params) @@ -291,9 +293,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc if vector_store_id in self.cache: return self.cache[vector_store_id] - vector_store = await self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack") + + vector_store = VectorStore.model_validate_json(vector_store_data) collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py new file mode 100644 index 000000000..648d641ad --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class ChromaVectorIOConfig(BaseModel): + url: str | None + persistence: KVStoreReference = Field(description="Config for KV store backend") + + @classmethod + def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: + return { + "url": url, + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::chroma_remote", + ).model_dump(exclude_none=True), + } diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py new file mode 100644 index 000000000..1b703d486 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Api, ProviderSpec + +from .config import MilvusVectorIOConfig + + +async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): + from .milvus import MilvusVectorIOAdapter + + assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py new file mode 100644 index 000000000..4b9d6a566 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class MilvusVectorIOConfig(BaseModel): + uri: str = Field(description="The URI of the Milvus server") + token: str | None = Field(description="The token of the Milvus server") + consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") + persistence: KVStoreReference = Field(description="Config for KV store backend") + + # This configuration allows additional fields to be passed through to the underlying Milvus client. + # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. + model_config = ConfigDict(extra="allow") + + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return { + "uri": "${env.MILVUS_ENDPOINT}", + "token": "${env.MILVUS_TOKEN}", + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::milvus_remote", + ).model_dump(exclude_none=True), + } diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py similarity index 93% rename from llama_stack/providers/remote/vector_io/milvus/milvus.py rename to src/llama_stack/providers/remote/vector_io/milvus/milvus.py index cccf13816..044d678fa 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -11,16 +11,9 @@ from typing import Any from numpy.typing import NDArray from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_WEIGHTED, @@ -29,6 +22,18 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig @@ -328,13 +333,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc if vector_store_id in self.cache: return self.cache[vector_store_id] - if self.vector_store_table is None: - raise VectorStoreNotFoundError(vector_store_id) - - vector_store = await self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise VectorStoreNotFoundError(vector_store_id) + vector_store = VectorStore.model_validate_json(vector_store_data) index = VectorStoreWithIndex( vector_store=vector_store, index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), @@ -348,19 +356,19 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc await self.cache[vector_store_id].index.delete() del self.cache[vector_store_id] - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py new file mode 100644 index 000000000..36018fd95 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Api, ProviderSpec + +from .config import PGVectorVectorIOConfig + + +async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): + from .pgvector import PGVectorVectorIOAdapter + + impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py new file mode 100644 index 000000000..87d40a883 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class PGVectorVectorIOConfig(BaseModel): + host: str | None = Field(default="localhost") + port: int | None = Field(default=5432) + db: str | None = Field(default="postgres") + user: str | None = Field(default="postgres") + password: str | None = Field(default="mysecretpassword") + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) + + @classmethod + def sample_run_config( + cls, + __distro_dir__: str, + host: str = "${env.PGVECTOR_HOST:=localhost}", + port: int = "${env.PGVECTOR_PORT:=5432}", + db: str = "${env.PGVECTOR_DB}", + user: str = "${env.PGVECTOR_USER}", + password: str = "${env.PGVECTOR_PASSWORD}", + **kwargs: Any, + ) -> dict[str, Any]: + return { + "host": host, + "port": port, + "db": db, + "user": user, + "password": password, + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::pgvector", + ).model_dump(exclude_none=True), + } diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py similarity index 88% rename from llama_stack/providers/remote/vector_io/pgvector/pgvector.py rename to src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index f28bd3cd9..5c86fb08d 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -13,19 +13,24 @@ from psycopg2 import sql from psycopg2.extras import Json, execute_values from pydantic import BaseModel, TypeAdapter -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore from .config import PGVectorVectorIOConfig @@ -368,6 +373,22 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt log.exception("Could not connect to PGVector database server") raise RuntimeError("Could not connect to PGVector database server") from e + # Load existing vector stores from KV store into cache + start_key = VECTOR_DBS_PREFIX + end_key = f"{VECTOR_DBS_PREFIX}\xff" + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + pgvector_index = PGVectorIndex( + vector_store=vector_store, + dimension=vector_store.embedding_dimension, + conn=self.conn, + kvstore=self.kvstore, + ) + await pgvector_index.initialize() + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + self.cache[vector_store.identifier] = index + async def shutdown(self) -> None: if self.conn is not None: self.conn.close() @@ -377,7 +398,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt async def register_vector_store(self, vector_store: VectorStore) -> None: # Persist vector DB metadata in the KV store - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.") + + # Save to kvstore for persistence + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) + # Upsert model metadata in Postgres upsert_models(self.conn, [(vector_store.identifier, vector_store)]) @@ -396,30 +423,34 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt del self.cache[vector_store_id] # Delete vector DB metadata from KV store - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.") await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}") - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) return await index.query_chunks(query, params) async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: if vector_store_id in self.cache: return self.cache[vector_store_id] - if self.vector_store_table is None: - raise VectorStoreNotFoundError(vector_store_id) - - vector_store = await self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise VectorStoreNotFoundError(vector_store_id) + vector_store = VectorStore.model_validate_json(vector_store_data) index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py new file mode 100644 index 000000000..b5b02fe59 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Api, ProviderSpec + +from .config import QdrantVectorIOConfig + + +async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): + from .qdrant import QdrantVectorIOAdapter + + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py new file mode 100644 index 000000000..e0a3fe207 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + location: str | None = None + url: str | None = None + port: int | None = 6333 + grpc_port: int = 6334 + prefer_grpc: bool = False + https: bool | None = None + api_key: str | None = None + prefix: str | None = None + timeout: int | None = None + host: str | None = None + persistence: KVStoreReference + + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return { + "api_key": "${env.QDRANT_API_KEY:=}", + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::qdrant_remote", + ).model_dump(exclude_none=True), + } diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py similarity index 88% rename from llama_stack/providers/remote/vector_io/qdrant/qdrant.py rename to src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 93d0894a6..4dd78d834 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -13,23 +13,24 @@ from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models from qdrant_client.models import PointStruct -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, - VectorStoreChunkingStrategy, - VectorStoreFileObject, -) -from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig -from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreChunkingStrategy, + VectorStoreFileObject, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig @@ -183,7 +184,8 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc await super().shutdown() async def register_vector_store(self, vector_store: VectorStore) -> None: - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.") key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" await self.kvstore.set(key=key, value=vector_store.model_dump_json()) @@ -200,20 +202,24 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc await self.cache[vector_store_id].index.delete() del self.cache[vector_store_id] - assert self.kvstore is not None + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: if vector_store_id in self.cache: return self.cache[vector_store_id] - if self.vector_store_table is None: - raise ValueError(f"Vector DB not found {vector_store_id}") + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") - vector_store = await self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise VectorStoreNotFoundError(vector_store_id) + vector_store = VectorStore.model_validate_json(vector_store_data) index = VectorStoreWithIndex( vector_store=vector_store, index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), @@ -222,19 +228,19 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc self.cache[vector_store_id] = index return index - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) return await index.query_chunks(query, params) diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py new file mode 100644 index 000000000..47546d459 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_api import Api, ProviderSpec + +from .config import WeaviateVectorIOConfig + + +async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): + from .weaviate import WeaviateVectorIOAdapter + + impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py new file mode 100644 index 000000000..75d1b7c51 --- /dev/null +++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -0,0 +1,32 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type + + +@json_schema_type +class WeaviateVectorIOConfig(BaseModel): + weaviate_api_key: str | None = Field(description="The API key for the Weaviate instance", default=None) + weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080") + persistence: KVStoreReference | None = Field( + description="Config for KV store backend (SQLite only for now)", default=None + ) + + @classmethod + def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: + return { + "weaviate_api_key": None, + "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}", + "persistence": KVStoreReference( + backend="kv_default", + namespace="vector_io::weaviate", + ).model_dump(exclude_none=True), + } diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py similarity index 93% rename from llama_stack/providers/remote/vector_io/weaviate/weaviate.py rename to src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 66922aa3f..c15d5f468 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -12,17 +12,9 @@ from numpy.typing import NDArray from weaviate.classes.init import Auth from weaviate.classes.query import Filter, HybridFusion -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, @@ -31,6 +23,18 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) +from llama_stack_api.internal.kvstore import KVStore from .config import WeaviateVectorIOConfig @@ -346,13 +350,16 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv if vector_store_id in self.cache: return self.cache[vector_store_id] - if self.vector_store_table is None: - raise VectorStoreNotFoundError(vector_store_id) - - vector_store = await self.vector_store_table.get_vector_store(vector_store_id) - if not vector_store: + # Try to load from kvstore + if self.kvstore is None: + raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.") + + key = f"{VECTOR_DBS_PREFIX}{vector_store_id}" + vector_store_data = await self.kvstore.get(key) + if not vector_store_data: raise VectorStoreNotFoundError(vector_store_id) + vector_store = VectorStore.model_validate_json(vector_store_data) client = self._get_client() sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) if not client.collections.exists(sanitized_collection_name): @@ -366,19 +373,19 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv self.cache[vector_store_id] = index return index - async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_store_index(vector_db_id) + async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) await index.insert_chunks(chunks) async def query_chunks( - self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None + self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_store_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_store_id) if not index: - raise VectorStoreNotFoundError(vector_db_id) + raise VectorStoreNotFoundError(vector_store_id) return await index.query_chunks(query, params) diff --git a/llama_stack/providers/remote/__init__.py b/src/llama_stack/providers/utils/__init__.py similarity index 100% rename from llama_stack/providers/remote/__init__.py rename to src/llama_stack/providers/utils/__init__.py diff --git a/llama_stack/providers/remote/agents/__init__.py b/src/llama_stack/providers/utils/bedrock/__init__.py similarity index 100% rename from llama_stack/providers/remote/agents/__init__.py rename to src/llama_stack/providers/utils/bedrock/__init__.py diff --git a/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py similarity index 100% rename from llama_stack/providers/utils/bedrock/client.py rename to src/llama_stack/providers/utils/bedrock/client.py diff --git a/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py similarity index 100% rename from llama_stack/providers/utils/bedrock/config.py rename to src/llama_stack/providers/utils/bedrock/config.py diff --git a/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py similarity index 100% rename from llama_stack/providers/utils/bedrock/refreshable_boto_session.py rename to src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py diff --git a/llama_stack/providers/remote/datasetio/__init__.py b/src/llama_stack/providers/utils/common/__init__.py similarity index 100% rename from llama_stack/providers/remote/datasetio/__init__.py rename to src/llama_stack/providers/utils/common/__init__.py diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py similarity index 96% rename from llama_stack/providers/utils/common/data_schema_validator.py rename to src/llama_stack/providers/utils/common/data_schema_validator.py index b0305104f..c9a3b0920 100644 --- a/llama_stack/providers/utils/common/data_schema_validator.py +++ b/src/llama_stack/providers/utils/common/data_schema_validator.py @@ -7,12 +7,8 @@ from enum import Enum from typing import Any -from llama_stack.apis.common.type_system import ( - ChatCompletionInputType, - CompletionInputType, - StringType, -) from llama_stack.core.datatypes import Api +from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType class ColumnName(Enum): diff --git a/llama_stack/providers/remote/eval/__init__.py b/src/llama_stack/providers/utils/datasetio/__init__.py similarity index 100% rename from llama_stack/providers/remote/eval/__init__.py rename to src/llama_stack/providers/utils/datasetio/__init__.py diff --git a/llama_stack/providers/utils/datasetio/url_utils.py b/src/llama_stack/providers/utils/datasetio/url_utils.py similarity index 100% rename from llama_stack/providers/utils/datasetio/url_utils.py rename to src/llama_stack/providers/utils/datasetio/url_utils.py diff --git a/llama_stack/providers/remote/inference/__init__.py b/src/llama_stack/providers/utils/files/__init__.py similarity index 100% rename from llama_stack/providers/remote/inference/__init__.py rename to src/llama_stack/providers/utils/files/__init__.py diff --git a/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py similarity index 97% rename from llama_stack/providers/utils/files/form_data.py rename to src/llama_stack/providers/utils/files/form_data.py index 3d8fb6d85..3fac14f38 100644 --- a/llama_stack/providers/utils/files/form_data.py +++ b/src/llama_stack/providers/utils/files/form_data.py @@ -9,7 +9,7 @@ import json from fastapi import Request from pydantic import BaseModel, ValidationError -from llama_stack.apis.files import ExpiresAfter +from llama_stack_api import ExpiresAfter async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None: diff --git a/llama_stack/providers/utils/inference/__init__.py b/src/llama_stack/providers/utils/inference/__init__.py similarity index 100% rename from llama_stack/providers/utils/inference/__init__.py rename to src/llama_stack/providers/utils/inference/__init__.py diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py similarity index 95% rename from llama_stack/providers/utils/inference/embedding_mixin.py rename to src/llama_stack/providers/utils/inference/embedding_mixin.py index c959b9c19..f7e5c711b 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py @@ -17,7 +17,7 @@ from llama_stack.log import get_logger if TYPE_CHECKING: from sentence_transformers import SentenceTransformer -from llama_stack.apis.inference import ( +from llama_stack_api import ( ModelStore, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, @@ -46,8 +46,7 @@ class SentenceTransformerEmbeddingMixin: raise ValueError("Empty list not supported") # Get the model and generate embeddings - model_obj = await self.model_store.get_model(params.model) - embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id) + embedding_model = await self._load_sentence_transformer_model(params.model) embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False) # Convert embeddings to the requested format diff --git a/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py similarity index 85% rename from llama_stack/providers/utils/inference/inference_store.py rename to src/llama_stack/providers/utils/inference/inference_store.py index 8e20bca6b..a8a0cace4 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/src/llama_stack/providers/utils/inference/inference_store.py @@ -8,20 +8,19 @@ from typing import Any from sqlalchemy.exc import IntegrityError -from llama_stack.apis.inference import ( +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl +from llama_stack.log import get_logger +from llama_stack_api import ( ListOpenAIChatCompletionResponse, OpenAIChatCompletion, OpenAICompletionWithInputMessages, OpenAIMessageParam, Order, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType -from llama_stack.log import get_logger - -from ..sqlstore.api import ColumnDefinition, ColumnType -from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType logger = get_logger(name=__name__, category="inference") @@ -35,6 +34,7 @@ class InferenceStore: self.reference = reference self.sql_store = None self.policy = policy + self.enable_write_queue = True # Async write queue and worker control self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None @@ -47,14 +47,13 @@ class InferenceStore: base_store = sqlstore_impl(self.reference) self.sql_store = AuthorizedSqlStore(base_store, self.policy) - # Disable write queue for SQLite to avoid concurrency issues - backend_name = self.reference.backend - backend_config = _SQLSTORE_BACKENDS.get(backend_name) - if backend_config is None: - raise ValueError( - f"Unregistered SQL backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" - ) - self.enable_write_queue = backend_config.type != StorageBackendType.SQL_SQLITE + # Disable write queue for SQLite since WAL mode handles concurrency + # Keep it enabled for other backends (like Postgres) for performance + backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) + if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE: + self.enable_write_queue = False + logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)") + await self.sql_store.create_table( "chat_completions", { @@ -66,13 +65,6 @@ class InferenceStore: }, ) - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - else: - logger.info("Write queue disabled for SQLite to avoid concurrency issues") - async def shutdown(self) -> None: if not self._worker_tasks: return @@ -93,10 +85,29 @@ class InferenceStore: if self.enable_write_queue and self._queue is not None: await self._queue.join() + async def _ensure_workers_started(self) -> None: + """Ensure the async write queue workers run on the current loop.""" + if not self.enable_write_queue: + return + + if self._queue is None: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + logger.debug( + f"Inference store write queue created with max size {self._max_write_queue_size} " + f"and {self._num_writers} writers" + ) + + if not self._worker_tasks: + loop = asyncio.get_running_loop() + for _ in range(self._num_writers): + task = loop.create_task(self._worker_loop()) + self._worker_tasks.append(task) + async def store_chat_completion( self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam] ) -> None: if self.enable_write_queue: + await self._ensure_workers_started() if self._queue is None: raise ValueError("Inference store is not initialized") try: diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py similarity index 79% rename from llama_stack/providers/utils/inference/litellm_openai_mixin.py rename to src/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 42b89f897..47c68ff0a 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -10,10 +10,14 @@ from collections.abc import AsyncIterator import litellm -from llama_stack.apis.inference import ( - ChatCompletionRequest, +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry +from llama_stack.providers.utils.inference.openai_compat import ( + prepare_openai_completion_params, +) +from llama_stack_api import ( InferenceProvider, - JsonSchemaResponseFormat, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -23,16 +27,6 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, - ToolChoice, -) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry -from llama_stack.providers.utils.inference.openai_compat import ( - convert_message_to_openai_dict_new, - convert_tooldef_to_openai_tool, - get_sampling_options, - prepare_openai_completion_params, ) logger = get_logger(name=__name__, category="providers::utils") @@ -127,59 +121,13 @@ class LiteLLMOpenAIMixin( return schema - async def _get_params(self, request: ChatCompletionRequest) -> dict: - input_dict = {} - - input_dict["messages"] = [ - await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages - ] - if fmt := request.response_format: - if not isinstance(fmt, JsonSchemaResponseFormat): - raise ValueError( - f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported." - ) - - fmt = fmt.json_schema - name = fmt["title"] - del fmt["title"] - fmt["additionalProperties"] = False - - # Apply additionalProperties: False recursively to all objects - fmt = self._add_additional_properties_recursive(fmt) - - input_dict["response_format"] = { - "type": "json_schema", - "json_schema": { - "name": name, - "schema": fmt, - "strict": self.json_schema_strict, - }, - } - if request.tools: - input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools] - if request.tool_config.tool_choice: - input_dict["tool_choice"] = ( - request.tool_config.tool_choice.value - if isinstance(request.tool_config.tool_choice, ToolChoice) - else request.tool_config.tool_choice - ) - - return { - "model": request.model, - "api_key": self.get_api_key(), - "api_base": self.api_base, - **input_dict, - "stream": request.stream, - **get_sampling_options(request.sampling_params), - } - def get_api_key(self) -> str: provider_data = self.get_request_provider_data() key_field = self.provider_data_api_key_field - if provider_data and getattr(provider_data, key_field, None): - api_key = getattr(provider_data, key_field) - else: - api_key = self.api_key_from_config + if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)): + return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection + + api_key = self.api_key_from_config if not api_key: raise ValueError( "API key is not set. Please provide a valid API key in the " @@ -192,7 +140,13 @@ class LiteLLMOpenAIMixin( self, params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: + if not self.model_store: + raise ValueError("Model store is not initialized") + model_obj = await self.model_store.get_model(params.model) + if model_obj.provider_resource_id is None: + raise ValueError(f"Model {params.model} has no provider_resource_id") + provider_resource_id = model_obj.provider_resource_id # Convert input to list if it's a string input_list = [params.input] if isinstance(params.input, str) else params.input @@ -200,7 +154,7 @@ class LiteLLMOpenAIMixin( # Call litellm embedding function # litellm.drop_params = True response = litellm.embedding( - model=self.get_litellm_model_name(model_obj.provider_resource_id), + model=self.get_litellm_model_name(provider_resource_id), input=input_list, api_key=self.get_api_key(), api_base=self.api_base, @@ -217,7 +171,7 @@ class LiteLLMOpenAIMixin( return OpenAIEmbeddingsResponse( data=data, - model=model_obj.provider_resource_id, + model=provider_resource_id, usage=usage, ) @@ -225,10 +179,16 @@ class LiteLLMOpenAIMixin( self, params: OpenAICompletionRequestWithExtraBody, ) -> OpenAICompletion: + if not self.model_store: + raise ValueError("Model store is not initialized") + model_obj = await self.model_store.get_model(params.model) + if model_obj.provider_resource_id is None: + raise ValueError(f"Model {params.model} has no provider_resource_id") + provider_resource_id = model_obj.provider_resource_id request_params = await prepare_openai_completion_params( - model=self.get_litellm_model_name(model_obj.provider_resource_id), + model=self.get_litellm_model_name(provider_resource_id), prompt=params.prompt, best_of=params.best_of, echo=params.echo, @@ -249,26 +209,32 @@ class LiteLLMOpenAIMixin( api_key=self.get_api_key(), api_base=self.api_base, ) - return await litellm.atext_completion(**request_params) + # LiteLLM returns compatible type but mypy can't verify external library + return await litellm.atext_completion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs async def openai_chat_completion( self, params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: # Add usage tracking for streaming when telemetry is active - from llama_stack.providers.utils.telemetry.tracing import get_current_span stream_options = params.stream_options - if params.stream and get_current_span() is not None: + if params.stream: if stream_options is None: stream_options = {"include_usage": True} elif "include_usage" not in stream_options: stream_options = {**stream_options, "include_usage": True} + if not self.model_store: + raise ValueError("Model store is not initialized") + model_obj = await self.model_store.get_model(params.model) + if model_obj.provider_resource_id is None: + raise ValueError(f"Model {params.model} has no provider_resource_id") + provider_resource_id = model_obj.provider_resource_id request_params = await prepare_openai_completion_params( - model=self.get_litellm_model_name(model_obj.provider_resource_id), + model=self.get_litellm_model_name(provider_resource_id), messages=params.messages, frequency_penalty=params.frequency_penalty, function_call=params.function_call, @@ -294,7 +260,8 @@ class LiteLLMOpenAIMixin( api_key=self.get_api_key(), api_base=self.api_base, ) - return await litellm.acompletion(**request_params) + # LiteLLM returns compatible type but mypy can't verify external library + return await litellm.acompletion(**request_params) # type: ignore[no-any-return] # external lib lacks type stubs async def check_model_availability(self, model: str) -> bool: """ diff --git a/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py similarity index 96% rename from llama_stack/providers/utils/inference/model_registry.py rename to src/llama_stack/providers/utils/inference/model_registry.py index d60d00f87..42b54497f 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/src/llama_stack/providers/utils/inference/model_registry.py @@ -8,19 +8,17 @@ from typing import Any from pydantic import BaseModel, Field, SecretStr -from llama_stack.apis.common.errors import UnsupportedModelError -from llama_stack.apis.models import ModelType from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) +from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError logger = get_logger(name=__name__, category="providers::utils") class RemoteInferenceProviderConfig(BaseModel): - allowed_models: list[str] | None = Field( # TODO: make this non-optional and give a list() default + allowed_models: list[str] | None = Field( default=None, description="List of models that should be registered with the model registry. If None, all models are allowed.", ) diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py new file mode 100644 index 000000000..3ce7d361d --- /dev/null +++ b/src/llama_stack/providers/utils/inference/openai_compat.py @@ -0,0 +1,237 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import ( + Any, +) + +from openai.types.chat import ( + ChatCompletionMessageToolCall, +) +from pydantic import BaseModel + +from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import ( + BuiltinTool, + StopReason, + ToolCall, + ToolDefinition, +) + +logger = get_logger(name=__name__, category="providers::utils") + + +class OpenAICompatCompletionChoiceDelta(BaseModel): + content: str + + +class OpenAICompatLogprobs(BaseModel): + text_offset: list[int] | None = None + + token_logprobs: list[float] | None = None + + tokens: list[str] | None = None + + top_logprobs: list[dict[str, float]] | None = None + + +class OpenAICompatCompletionChoice(BaseModel): + finish_reason: str | None = None + text: str | None = None + delta: OpenAICompatCompletionChoiceDelta | None = None + logprobs: OpenAICompatLogprobs | None = None + + +class OpenAICompatCompletionResponse(BaseModel): + choices: list[OpenAICompatCompletionChoice] + + +def text_from_choice(choice) -> str: + if hasattr(choice, "delta") and choice.delta: + return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations + + if hasattr(choice, "message"): + return choice.message.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations + + return choice.text # type: ignore[no-any-return] # external OpenAI types lack precise annotations + + +def get_stop_reason(finish_reason: str) -> StopReason: + if finish_reason in ["stop", "eos"]: + return StopReason.end_of_turn + elif finish_reason == "eom": + return StopReason.end_of_message + elif finish_reason == "length": + return StopReason.out_of_tokens + + return StopReason.out_of_tokens + + +class UnparseableToolCall(BaseModel): + """ + A ToolCall with arguments that are not valid JSON. + Mirrors the ToolCall schema, but with arguments as a string. + """ + + call_id: str = "" + tool_name: str = "" + arguments: str = "" + + +def convert_tool_call( + tool_call: ChatCompletionMessageToolCall, +) -> ToolCall | UnparseableToolCall: + """ + Convert a ChatCompletionMessageToolCall tool call to either a + ToolCall or UnparseableToolCall. Returns an UnparseableToolCall + if the tool call is not valid ToolCall. + """ + try: + valid_tool_call = ToolCall( + call_id=tool_call.id, + tool_name=tool_call.function.name, + arguments=tool_call.function.arguments, + ) + except Exception: + return UnparseableToolCall( + call_id=tool_call.id or "", + tool_name=tool_call.function.name or "", + arguments=tool_call.function.arguments or "", + ) + + return valid_tool_call + + +PYTHON_TYPE_TO_LITELLM_TYPE = { + "int": "integer", + "float": "number", + "bool": "boolean", + "str": "string", +} + + +def to_openai_param_type(param_type: str) -> dict: + """ + Convert Python type hints to OpenAI parameter type format. + + Examples: + 'str' -> {'type': 'string'} + 'int' -> {'type': 'integer'} + 'list[str]' -> {'type': 'array', 'items': {'type': 'string'}} + 'list[int]' -> {'type': 'array', 'items': {'type': 'integer'}} + """ + # Handle basic types first + basic_types = { + "str": "string", + "int": "integer", + "float": "number", + "bool": "boolean", + } + + if param_type in basic_types: + return {"type": basic_types[param_type]} + + # Handle list/array types + if param_type.startswith("list[") and param_type.endswith("]"): + inner_type = param_type[5:-1] + if inner_type in basic_types: + return { + "type": "array", + "items": {"type": basic_types.get(inner_type, inner_type)}, + } + + return {"type": param_type} + + +def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict: + """ + Convert a ToolDefinition to an OpenAI API-compatible dictionary. + + ToolDefinition: + tool_name: str | BuiltinTool + description: Optional[str] + input_schema: Optional[Dict[str, Any]] # JSON Schema + output_schema: Optional[Dict[str, Any]] # JSON Schema (not used by OpenAI) + + OpenAI spec - + + { + "type": "function", + "function": { + "name": tool_name, + "description": description, + "parameters": {}, + }, + } + + NOTE: OpenAI does not support output_schema, so it is dropped here. + """ + out = { + "type": "function", + "function": {}, + } + function = out["function"] + + if isinstance(tool.tool_name, BuiltinTool): + function["name"] = tool.tool_name.value # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str] + else: + function["name"] = tool.tool_name # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str] + + if tool.description: + function["description"] = tool.description # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str] + + if tool.input_schema: + # Pass through the entire JSON Schema as-is + function["parameters"] = tool.input_schema # type: ignore[index] # dict value inferred as Any but mypy sees Collection[str] + + # NOTE: OpenAI does not support output_schema, so we drop it here + # It's stored in LlamaStack for validation and other provider usage + + return out + + +async def prepare_openai_completion_params(**params): + async def _prepare_value(value: Any) -> Any: + new_value = value + if isinstance(value, list): + new_value = [await _prepare_value(v) for v in value] + elif isinstance(value, dict): + new_value = {k: await _prepare_value(v) for k, v in value.items()} + elif isinstance(value, BaseModel): + new_value = value.model_dump(exclude_none=True) + return new_value + + completion_params = {} + for k, v in params.items(): + if v is not None: + completion_params[k] = await _prepare_value(v) + return completion_params + + +def prepare_openai_embeddings_params( + model: str, + input: str | list[str], + encoding_format: str | None = "float", + dimensions: int | None = None, + user: str | None = None, +): + if model is None: + raise ValueError("Model must be provided for embeddings") + + input_list = [input] if isinstance(input, str) else input + + params: dict[str, Any] = { + "model": model, + "input": input_list, + } + + if encoding_format is not None: + params["encoding_format"] = encoding_format + if dimensions is not None: + params["dimensions"] = dimensions + if user is not None: + params["user"] = user + + return params diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py similarity index 85% rename from llama_stack/providers/utils/inference/openai_mixin.py rename to src/llama_stack/providers/utils/inference/openai_mixin.py index a9ccc8091..30511a341 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/openai_mixin.py @@ -10,11 +10,17 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Iterable from typing import Any -from openai import NOT_GIVEN, AsyncOpenAI +from openai import AsyncOpenAI from pydantic import BaseModel, ConfigDict -from llama_stack.apis.inference import ( +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params +from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content +from llama_stack_api import ( Model, + ModelType, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -26,12 +32,6 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingUsage, OpenAIMessageParam, ) -from llama_stack.apis.models import ModelType -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params -from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content logger = get_logger(name=__name__, category="providers::utils") @@ -48,6 +48,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): - overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses - download_images: If True, downloads images and converts to base64 for providers that require it - embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata + - construct_model_from_identifier: Method to construct a Model instance corresponding to the given identifier - provider_data_api_key_field: Optional field name in provider data to look for API key - list_provider_model_ids: Method to list available models from the provider - get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client @@ -82,9 +83,6 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): # This is set in list_models() and used in check_model_availability() _model_cache: dict[str, Model] = {} - # List of allowed models for this provider, if empty all models allowed - allowed_models: list[str] = [] - # Optional field name in provider data to look for API key, which takes precedence provider_data_api_key_field: str | None = None @@ -121,6 +119,30 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): """ return {} + def construct_model_from_identifier(self, identifier: str) -> Model: + """ + Construct a Model instance corresponding to the given identifier + + Child classes can override this to customize model typing/metadata. + + :param identifier: The provider's model identifier + :return: A Model instance + """ + if metadata := self.embedding_model_metadata.get(identifier): + return Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=identifier, + identifier=identifier, + model_type=ModelType.embedding, + metadata=metadata, + ) + return Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=identifier, + identifier=identifier, + model_type=ModelType.llm, + ) + async def list_provider_model_ids(self) -> Iterable[str]: """ List available models from the provider. @@ -191,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): return api_key + def _validate_model_allowed(self, provider_model_id: str) -> None: + """ + Validate that the model is in the allowed_models list if configured. + + :param provider_model_id: The provider-specific model ID to validate + :raises ValueError: If the model is not in the allowed_models list + """ + if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models: + raise ValueError( + f"Model '{provider_model_id}' is not in the allowed models list. " + f"Allowed models: {self.config.allowed_models}" + ) + async def _get_provider_model_id(self, model: str) -> str: """ Get the provider-specific model ID from the model store. @@ -201,8 +236,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): :param model: The registered model name/identifier :return: The provider-specific model ID (e.g., "gpt-4") """ - # Look up the registered model to get the provider-specific model ID # self.model_store is injected by the distribution system at runtime + if not await self.model_store.has_model(model): # type: ignore[attr-defined] + return model + + # Look up the registered model to get the provider-specific model ID model_obj: Model = await self.model_store.get_model(model) # type: ignore[attr-defined] # provider_resource_id is str | None, but we expect it to be str for OpenAI calls if model_obj.provider_resource_id is None: @@ -234,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): Direct OpenAI completion API call. """ # TODO: fix openai_completion to return type compatible with OpenAI's API response + provider_model_id = await self._get_provider_model_id(params.model) + self._validate_model_allowed(provider_model_id) + completion_kwargs = await prepare_openai_completion_params( - model=await self._get_provider_model_id(params.model), + model=provider_model_id, prompt=params.prompt, best_of=params.best_of, echo=params.echo, @@ -267,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): """ Direct OpenAI chat completion API call. """ + provider_model_id = await self._get_provider_model_id(params.model) + self._validate_model_allowed(provider_model_id) + messages = params.messages if self.download_images: @@ -288,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): messages = [await _localize_image_url(m) for m in messages] request_params = await prepare_openai_completion_params( - model=await self._get_provider_model_id(params.model), + model=provider_model_id, messages=messages, frequency_penalty=params.frequency_penalty, function_call=params.function_call, @@ -326,21 +370,24 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): """ Direct OpenAI embeddings API call. """ - # Prepare request parameters - request_params = { - "model": await self._get_provider_model_id(params.model), + provider_model_id = await self._get_provider_model_id(params.model) + self._validate_model_allowed(provider_model_id) + + # Build request params conditionally to avoid NotGiven/Omit type mismatch + # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven + request_params: dict[str, Any] = { + "model": provider_model_id, "input": params.input, - "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, - "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, - "user": params.user if params.user is not None else NOT_GIVEN, } + if params.encoding_format is not None: + request_params["encoding_format"] = params.encoding_format + if params.dimensions is not None: + request_params["dimensions"] = params.dimensions + if params.user is not None: + request_params["user"] = params.user + if params.model_extra: + request_params["extra_body"] = params.model_extra - # Add extra_body if present - extra_body = params.model_extra - if extra_body: - request_params["extra_body"] = extra_body - - # Call OpenAI embeddings API with properly typed parameters response = await self.client.embeddings.create(**request_params) data = [] @@ -413,24 +460,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): for provider_model_id in provider_models_ids: if not isinstance(provider_model_id, str): raise ValueError(f"Model ID {provider_model_id} from list_provider_model_ids() is not a string") - if self.allowed_models and provider_model_id not in self.allowed_models: + if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models: logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list") continue - if metadata := self.embedding_model_metadata.get(provider_model_id): - model = Model( - provider_id=self.__provider_id__, # type: ignore[attr-defined] - provider_resource_id=provider_model_id, - identifier=provider_model_id, - model_type=ModelType.embedding, - metadata=metadata, - ) - else: - model = Model( - provider_id=self.__provider_id__, # type: ignore[attr-defined] - provider_resource_id=provider_model_id, - identifier=provider_model_id, - model_type=ModelType.llm, - ) + model = self.construct_model_from_identifier(provider_model_id) self._model_cache[provider_model_id] = model return list(self._model_cache.values()) diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py new file mode 100644 index 000000000..6272c9eed --- /dev/null +++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py @@ -0,0 +1,280 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import base64 +import io +import json +import re +from typing import Any + +import httpx +from PIL import Image as PIL_Image + +from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import ( + RawContent, + RawContentItem, + RawMediaItem, + RawMessage, + RawTextItem, + StopReason, + ToolCall, + ToolDefinition, + ToolPromptFormat, +) +from llama_stack.models.llama.llama3.chat_format import ChatFormat +from llama_stack.models.llama.llama3.tokenizer import Tokenizer +from llama_stack.models.llama.sku_list import resolve_model +from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal +from llama_stack_api import ( + CompletionRequest, + ImageContentItem, + InterleavedContent, + InterleavedContentItem, + OpenAIAssistantMessageParam, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIFile, + OpenAIMessageParam, + OpenAISystemMessageParam, + OpenAIToolMessageParam, + OpenAIUserMessageParam, + ResponseFormat, + ResponseFormatType, + TextContentItem, + ToolChoice, +) + +log = get_logger(name=__name__, category="providers::utils") + + +class CompletionRequestWithRawContent(CompletionRequest): + content: RawContent + + +def decode_assistant_message(content: str, stop_reason: StopReason) -> RawMessage: + formatter = ChatFormat(Tokenizer.get_instance()) + return formatter.decode_assistant_message_from_content(content, stop_reason) + + +def interleaved_content_as_str( + content: Any, + sep: str = " ", +) -> str: + if content is None: + return "" + + def _process(c) -> str: + if isinstance(c, str): + return c + elif isinstance(c, TextContentItem) or isinstance(c, OpenAIChatCompletionContentPartTextParam): + return c.text + elif isinstance(c, ImageContentItem) or isinstance(c, OpenAIChatCompletionContentPartImageParam): + return "" + elif isinstance(c, OpenAIFile): + return "" + else: + raise ValueError(f"Unsupported content type: {type(c)}") + + if isinstance(content, list): + return sep.join(_process(c) for c in content) + else: + return _process(content) + + +async def interleaved_content_convert_to_raw( + content: InterleavedContent, +) -> RawContent: + """Download content from URLs / files etc. so plain bytes can be sent to the model""" + + async def _localize_single(c: str | InterleavedContentItem) -> str | RawContentItem: + if isinstance(c, str): + return RawTextItem(text=c) + elif isinstance(c, TextContentItem): + return RawTextItem(text=c.text) + elif isinstance(c, ImageContentItem): + image = c.image + if image.url: + # Load image bytes from URL + if image.url.uri.startswith("data"): + match = re.match(r"data:image/(\w+);base64,(.+)", image.url.uri) + if not match: + raise ValueError(f"Invalid data URL format, {image.url.uri[:40]}...") + _, image_data = match.groups() + data = base64.b64decode(image_data) + elif image.url.uri.startswith("file://"): + path = image.url.uri[len("file://") :] + with open(path, "rb") as f: + data = f.read() # type: ignore + elif image.url.uri.startswith("http"): + async with httpx.AsyncClient() as client: + response = await client.get(image.url.uri) + data = response.content + else: + raise ValueError("Unsupported URL type") + elif image.data: + # data is a base64 encoded string, decode it to bytes for RawMediaItem + data = base64.b64decode(image.data) + else: + raise ValueError("No data or URL provided") + + return RawMediaItem(data=data) + else: + raise ValueError(f"Unsupported content type: {type(c)}") + + if isinstance(content, list): + return await asyncio.gather(*(_localize_single(c) for c in content)) + else: + return await _localize_single(content) + + +async def convert_openai_message_to_raw_message(message: OpenAIMessageParam) -> RawMessage: + """Convert OpenAI message format to RawMessage format used by Llama formatters.""" + if isinstance(message, OpenAIUserMessageParam): + content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type] + return RawMessage(role="user", content=content) + elif isinstance(message, OpenAISystemMessageParam): + content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type] + return RawMessage(role="system", content=content) + elif isinstance(message, OpenAIAssistantMessageParam): + content = await interleaved_content_convert_to_raw(message.content or "") # type: ignore[arg-type] + tool_calls = [] + if message.tool_calls: + for tc in message.tool_calls: + if tc.function: + tool_calls.append( + ToolCall( + call_id=tc.id or "", + tool_name=tc.function.name or "", + arguments=tc.function.arguments or "{}", + ) + ) + return RawMessage(role="assistant", content=content, tool_calls=tool_calls) + elif isinstance(message, OpenAIToolMessageParam): + content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type] + return RawMessage(role="tool", content=content) + else: + # Handle OpenAIDeveloperMessageParam if needed + raise ValueError(f"Unsupported message type: {type(message)}") + + +def content_has_media(content: InterleavedContent): + def _has_media_content(c): + return isinstance(c, ImageContentItem) + + if isinstance(content, list): + return any(_has_media_content(c) for c in content) + else: + return _has_media_content(content) + + +async def localize_image_content(uri: str) -> tuple[bytes, str] | None: + if uri.startswith("http"): + async with httpx.AsyncClient() as client: + r = await client.get(uri) + content = r.content + content_type = r.headers.get("content-type") + if content_type: + format = content_type.split("/")[-1] + else: + format = "png" + + return content, format + elif uri.startswith("data"): + # data:image/{format};base64,{data} + match = re.match(r"data:image/(\w+);base64,(.+)", uri) + if not match: + raise ValueError(f"Invalid data URL format, {uri[:40]}...") + fmt, image_data = match.groups() + content = base64.b64decode(image_data) + return content, fmt + else: + return None + + +async def convert_image_content_to_url( + media: ImageContentItem, download: bool = False, include_format: bool = True +) -> str: + image = media.image + if image.url and (not download or image.url.uri.startswith("data")): + return image.url.uri + + if image.data: + # data is a base64 encoded string, decode it to bytes first + # TODO(mf): do this more efficiently, decode less + content = base64.b64decode(image.data) + pil_image = PIL_Image.open(io.BytesIO(content)) + format = pil_image.format + else: + localize_result = await localize_image_content(image.url.uri) + if localize_result is None: + raise ValueError(f"Failed to localize image content from {image.url.uri}") + content, format = localize_result + + if include_format: + return f"data:image/{format};base64," + base64.b64encode(content).decode("utf-8") + else: + return base64.b64encode(content).decode("utf-8") + + +def augment_content_with_response_format_prompt(response_format, content): + if fmt_prompt := response_format_prompt(response_format): + if isinstance(content, list): + return content + [TextContentItem(text=fmt_prompt)] + elif isinstance(content, str): + return [TextContentItem(text=content), TextContentItem(text=fmt_prompt)] + else: + return [content, TextContentItem(text=fmt_prompt)] + + return content + + +def response_format_prompt(fmt: ResponseFormat | None): + if not fmt: + return None + + if fmt.type == ResponseFormatType.json_schema.value: + return f"Please respond in JSON format with the schema: {json.dumps(fmt.json_schema)}" + elif fmt.type == ResponseFormatType.grammar.value: + raise NotImplementedError("Grammar response format not supported yet") + else: + raise ValueError(f"Unknown response format {fmt.type}") + + +def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str: + if tool_choice == ToolChoice.auto: + return "" + elif tool_choice == ToolChoice.required: + return "You MUST use one of the provided functions/tools to answer the user query." + elif tool_choice == ToolChoice.none: + # tools are already not passed in + return "" + else: + # specific tool + return f"You MUST use the tool `{tool_choice}` to answer the user query." + + +def get_default_tool_prompt_format(model: str) -> ToolPromptFormat: + llama_model = resolve_model(model) + if llama_model is None: + log.warning(f"Could not resolve model {model}, defaulting to json tool prompt format") + return ToolPromptFormat.json + + if llama_model.model_family == ModelFamily.llama3_1 or ( + llama_model.model_family == ModelFamily.llama3_2 and is_multimodal(llama_model.core_model_id) + ): + # llama3.1 and llama3.2 multimodal models follow the same tool prompt format + return ToolPromptFormat.json + elif llama_model.model_family in ( + ModelFamily.llama3_2, + ModelFamily.llama3_3, + ModelFamily.llama4, + ): + # llama3.2 and llama3.3 models follow the same tool prompt format + return ToolPromptFormat.python_list + else: + return ToolPromptFormat.json diff --git a/llama_stack/providers/remote/post_training/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py similarity index 100% rename from llama_stack/providers/remote/post_training/__init__.py rename to src/llama_stack/providers/utils/memory/__init__.py diff --git a/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py similarity index 92% rename from llama_stack/providers/utils/memory/file_utils.py rename to src/llama_stack/providers/utils/memory/file_utils.py index 4c40056f3..6786293c6 100644 --- a/llama_stack/providers/utils/memory/file_utils.py +++ b/src/llama_stack/providers/utils/memory/file_utils.py @@ -8,7 +8,7 @@ import base64 import mimetypes import os -from llama_stack.apis.common.content_types import URL +from llama_stack_api import URL def data_url_from_file(file_path: str) -> URL: diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py similarity index 94% rename from llama_stack/providers/utils/memory/openai_vector_store_mixin.py rename to src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 8f9fb9fb4..bbfd60e25 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -15,21 +15,30 @@ from typing import Annotated, Any from fastapi import Body from pydantic import TypeAdapter -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.vector_io import ( +from llama_stack.core.id_generation import generate_object_id +from llama_stack.log import get_logger +from llama_stack.providers.utils.memory.vector_store import ( + ChunkForDeletion, + content_from_data_and_mime_type, + make_overlapped_chunks, +) +from llama_stack_api import ( Chunk, + Files, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreRequestWithExtraBody, + OpenAIFileObject, QueryChunksResponse, SearchRankingOptions, + VectorStore, VectorStoreChunkingStrategy, VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, VectorStoreContent, VectorStoreDeleteResponse, VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileCounts, VectorStoreFileDeleteResponse, VectorStoreFileLastError, @@ -38,19 +47,12 @@ from llama_stack.apis.vector_io import ( VectorStoreFileStatus, VectorStoreListFilesResponse, VectorStoreListResponse, + VectorStoreNotFoundError, VectorStoreObject, VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.core.id_generation import generate_object_id -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - content_from_data_and_mime_type, - make_overlapped_chunks, -) +from llama_stack_api.internal.kvstore import KVStore EMBEDDING_DIMENSION = 768 @@ -333,7 +335,7 @@ class OpenAIVectorStoreMixin(ABC): @abstractmethod async def insert_chunks( self, - vector_db_id: str, + vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None, ) -> None: @@ -342,7 +344,7 @@ class OpenAIVectorStoreMixin(ABC): @abstractmethod async def query_chunks( - self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None + self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None ) -> QueryChunksResponse: """Query chunks from a vector database (provider-specific implementation).""" pass @@ -414,6 +416,10 @@ class OpenAIVectorStoreMixin(ABC): in_progress=0, total=0, ) + if not params.chunking_strategy or params.chunking_strategy.type == "auto": + chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig()) + else: + chunking_strategy = params.chunking_strategy store_info: dict[str, Any] = { "id": vector_store_id, "object": "vector_store", @@ -426,7 +432,7 @@ class OpenAIVectorStoreMixin(ABC): "expires_at": None, "last_active_at": created_at, "file_ids": [], - "chunking_strategy": params.chunking_strategy, + "chunking_strategy": chunking_strategy.model_dump(), } # Add provider information to metadata if provided @@ -609,7 +615,7 @@ class OpenAIVectorStoreMixin(ABC): # TODO: Add support for ranking_options.ranker response = await self.query_chunks( - vector_db_id=vector_store_id, + vector_store_id=vector_store_id, query=search_query, params=params, ) @@ -637,7 +643,7 @@ class OpenAIVectorStoreMixin(ABC): break return VectorStoreSearchResponsePage( - search_query=search_query, + search_query=query if isinstance(query, list) else [query], data=data, has_more=False, # For simplicity, we don't implement pagination here next_page=None, @@ -647,7 +653,7 @@ class OpenAIVectorStoreMixin(ABC): logger.error(f"Error searching vector store {vector_store_id}: {e}") # Return empty results on error return VectorStoreSearchResponsePage( - search_query=search_query, + search_query=query if isinstance(query, list) else [query], data=[], has_more=False, next_page=None, @@ -699,34 +705,35 @@ class OpenAIVectorStoreMixin(ABC): # Unknown filter type, default to no match raise ValueError(f"Unsupported filter type: {filter_type}") - def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]: - # content is InterleavedContent + def _chunk_to_vector_store_content( + self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False + ) -> list[VectorStoreContent]: + def extract_fields() -> dict: + """Extract embedding and metadata fields from chunk based on include flags.""" + return { + "embedding": chunk.embedding if include_embeddings else None, + "chunk_metadata": chunk.chunk_metadata if include_metadata else None, + "metadata": chunk.metadata if include_metadata else None, + } + + fields = extract_fields() + if isinstance(chunk.content, str): - content = [ - VectorStoreContent( - type="text", - text=chunk.content, - ) - ] + content_item = VectorStoreContent(type="text", text=chunk.content, **fields) + content = [content_item] elif isinstance(chunk.content, list): # TODO: Add support for other types of content - content = [ - VectorStoreContent( - type="text", - text=item.text, - ) - for item in chunk.content - if item.type == "text" - ] + content = [] + for item in chunk.content: + if item.type == "text": + content_item = VectorStoreContent(type="text", text=item.text, **fields) + content.append(content_item) else: if chunk.content.type != "text": raise ValueError(f"Unsupported content type: {chunk.content.type}") - content = [ - VectorStoreContent( - type="text", - text=chunk.content.text, - ) - ] + + content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields) + content = [content_item] return content async def openai_attach_file_to_vector_store( @@ -803,7 +810,7 @@ class OpenAIVectorStoreMixin(ABC): ) else: await self.insert_chunks( - vector_db_id=vector_store_id, + vector_store_id=vector_store_id, chunks=chunks, ) vector_store_file_object.status = "completed" @@ -815,13 +822,12 @@ class OpenAIVectorStoreMixin(ABC): message=str(e), ) - # Create OpenAI vector store file metadata + # Save vector store file to persistent storage AFTER insert_chunks + # so that chunks include the embeddings that were generated file_info = vector_store_file_object.model_dump(exclude={"last_error"}) file_info["filename"] = file_response.filename if file_response else "" - # Save vector store file to persistent storage (provider-specific) dict_chunks = [c.model_dump() for c in chunks] - # This should be updated to include chunk_id await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) # Update file_ids and file_counts in vector store metadata @@ -886,8 +892,8 @@ class OpenAIVectorStoreMixin(ABC): # Determine pagination info has_more = len(file_objects) > limit - first_id = file_objects[0].id if file_objects else None - last_id = file_objects[-1].id if file_objects else None + first_id = limited_files[0].id if file_objects else None + last_id = limited_files[-1].id if file_objects else None return VectorStoreListFilesResponse( data=limited_files, @@ -916,22 +922,27 @@ class OpenAIVectorStoreMixin(ABC): self, vector_store_id: str, file_id: str, - ) -> VectorStoreFileContentsResponse: + include_embeddings: bool | None = False, + include_metadata: bool | None = False, + ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) - file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) + # Parameters are already provided directly + # include_embeddings and include_metadata are now function parameters + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] content = [] for chunk in chunks: - content.extend(self._chunk_to_vector_store_content(chunk)) - return VectorStoreFileContentsResponse( - file_id=file_id, - filename=file_info.get("filename", ""), - attributes=file_info.get("attributes", {}), - content=content, + content.extend( + self._chunk_to_vector_store_content( + chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False + ) + ) + return VectorStoreFileContentResponse( + data=content, ) async def openai_update_vector_store_file( diff --git a/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py similarity index 97% rename from llama_stack/providers/utils/memory/vector_store.py rename to src/llama_stack/providers/utils/memory/vector_store.py index 6c8746e92..b6a671ddb 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -17,21 +17,23 @@ import numpy as np from numpy.typing import NDArray from pydantic import BaseModel -from llama_stack.apis.common.content_types import ( - URL, - InterleavedContent, -) -from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody -from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.datatypes import Api from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id +from llama_stack_api import ( + URL, + Api, + Chunk, + ChunkMetadata, + InterleavedContent, + OpenAIEmbeddingsRequestWithExtraBody, + QueryChunksResponse, + RAGDocument, + VectorStore, +) log = get_logger(name=__name__, category="providers::utils") @@ -196,6 +198,7 @@ def make_overlapped_chunks( chunks.append( Chunk( content=chunk, + chunk_id=chunk_id, metadata=chunk_metadata, chunk_metadata=backend_chunk_metadata, ) diff --git a/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py similarity index 95% rename from llama_stack/providers/utils/pagination.py rename to src/llama_stack/providers/utils/pagination.py index 033022491..d1d9e36c5 100644 --- a/llama_stack/providers/utils/pagination.py +++ b/src/llama_stack/providers/utils/pagination.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack_api import PaginatedResponse def paginate_records( diff --git a/llama_stack/providers/remote/safety/__init__.py b/src/llama_stack/providers/utils/responses/__init__.py similarity index 100% rename from llama_stack/providers/remote/safety/__init__.py rename to src/llama_stack/providers/utils/responses/__init__.py diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py new file mode 100644 index 000000000..0401db206 --- /dev/null +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -0,0 +1,282 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.log import get_logger +from llama_stack_api import ( + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + OpenAIDeleteResponseObject, + OpenAIMessageParam, + OpenAIResponseInput, + OpenAIResponseObject, + OpenAIResponseObjectWithInput, + Order, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType + +logger = get_logger(name=__name__, category="openai_responses") + + +class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput): + """Internal class for storing responses with chat completion messages. + + This extends the public OpenAIResponseObjectWithInput with messages field + for internal storage. The messages field is not exposed in the public API. + + The messages field is optional for backward compatibility with responses + stored before this feature was added. + """ + + messages: list[OpenAIMessageParam] | None = None + + +class ResponsesStore: + def __init__( + self, + reference: ResponsesStoreReference | SqlStoreReference, + policy: list[AccessRule], + ): + if isinstance(reference, ResponsesStoreReference): + self.reference = reference + else: + self.reference = ResponsesStoreReference(**reference.model_dump()) + + self.policy = policy + self.sql_store = None + + async def initialize(self): + """Create the necessary tables if they don't exist.""" + base_store = sqlstore_impl(self.reference) + self.sql_store = AuthorizedSqlStore(base_store, self.policy) + + await self.sql_store.create_table( + "openai_responses", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "created_at": ColumnType.INTEGER, + "response_object": ColumnType.JSON, + "model": ColumnType.STRING, + }, + ) + + await self.sql_store.create_table( + "conversation_messages", + { + "conversation_id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "messages": ColumnType.JSON, + }, + ) + + async def shutdown(self) -> None: + return + + async def flush(self) -> None: + """Maintained for compatibility; no-op now that writes are synchronous.""" + return + + async def store_response_object( + self, + response_object: OpenAIResponseObject, + input: list[OpenAIResponseInput], + messages: list[OpenAIMessageParam], + ) -> None: + await self._write_response_object(response_object, input, messages) + + async def _write_response_object( + self, + response_object: OpenAIResponseObject, + input: list[OpenAIResponseInput], + messages: list[OpenAIMessageParam], + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + + data = response_object.model_dump() + data["input"] = [input_item.model_dump() for input_item in input] + data["messages"] = [msg.model_dump() for msg in messages] + + await self.sql_store.insert( + "openai_responses", + { + "id": data["id"], + "created_at": data["created_at"], + "model": data["model"], + "response_object": data, + }, + ) + + async def list_responses( + self, + after: str | None = None, + limit: int | None = 50, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseObject: + """ + List responses from the database. + + :param after: The ID of the last response to return. + :param limit: The maximum number of responses to return. + :param model: The model to filter by. + :param order: The order to sort the responses by. + """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + + if not order: + order = Order.desc + + where_conditions = {} + if model: + where_conditions["model"] = model + + paginated_result = await self.sql_store.fetch_all( + table="openai_responses", + where=where_conditions if where_conditions else None, + order_by=[("created_at", order.value)], + cursor=("id", after) if after else None, + limit=limit, + ) + + data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in paginated_result.data] + return ListOpenAIResponseObject( + data=data, + has_more=paginated_result.has_more, + first_id=data[0].id if data else "", + last_id=data[-1].id if data else "", + ) + + async def get_response_object(self, response_id: str) -> _OpenAIResponseObjectWithInputAndMessages: + """ + Get a response object with automatic access control checking. + """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + + row = await self.sql_store.fetch_one( + "openai_responses", + where={"id": response_id}, + ) + + if not row: + # SecureSqlStore will return None if record doesn't exist OR access is denied + # This provides security by not revealing whether the record exists + raise ValueError(f"Response with id {response_id} not found") from None + + return _OpenAIResponseObjectWithInputAndMessages(**row["response_object"]) + + async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject: + if not self.sql_store: + raise ValueError("Responses store is not initialized") + + row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}) + if not row: + raise ValueError(f"Response with id {response_id} not found") + await self.sql_store.delete("openai_responses", where={"id": response_id}) + return OpenAIDeleteResponseObject(id=response_id) + + async def list_response_input_items( + self, + response_id: str, + after: str | None = None, + before: str | None = None, + include: list[str] | None = None, + limit: int | None = 20, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseInputItem: + """ + List input items for a given response. + + :param response_id: The ID of the response to retrieve input items for. + :param after: An item ID to list items after, used for pagination. + :param before: An item ID to list items before, used for pagination. + :param include: Additional fields to include in the response. + :param limit: A limit on the number of objects to be returned. + :param order: The order to return the input items in. + """ + if include: + raise NotImplementedError("Include is not supported yet") + if before and after: + raise ValueError("Cannot specify both 'before' and 'after' parameters") + + response_with_input_and_messages = await self.get_response_object(response_id) + items = response_with_input_and_messages.input + + if order == Order.desc: + items = list(reversed(items)) + + start_index = 0 + end_index = len(items) + + if after or before: + for i, item in enumerate(items): + item_id = getattr(item, "id", None) + if after and item_id == after: + start_index = i + 1 + if before and item_id == before: + end_index = i + break + + if after and start_index == 0: + raise ValueError(f"Input item with id '{after}' not found for response '{response_id}'") + if before and end_index == len(items): + raise ValueError(f"Input item with id '{before}' not found for response '{response_id}'") + + items = items[start_index:end_index] + + # Apply limit + if limit is not None: + items = items[:limit] + + return ListOpenAIResponseInputItem(data=items) + + async def store_conversation_messages(self, conversation_id: str, messages: list[OpenAIMessageParam]) -> None: + """Store messages for a conversation. + + :param conversation_id: The conversation identifier. + :param messages: List of OpenAI message parameters to store. + """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + + # Serialize messages to dict format for JSON storage + messages_data = [msg.model_dump() for msg in messages] + + await self.sql_store.upsert( + table="conversation_messages", + data={"conversation_id": conversation_id, "messages": messages_data}, + conflict_columns=["conversation_id"], + update_columns=["messages"], + ) + + logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}") + + async def get_conversation_messages(self, conversation_id: str) -> list[OpenAIMessageParam] | None: + """Get stored messages for a conversation. + + :param conversation_id: The conversation identifier. + :returns: List of OpenAI message parameters, or None if no messages stored. + """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + + record = await self.sql_store.fetch_one( + table="conversation_messages", + where={"conversation_id": conversation_id}, + ) + + if record is None: + return None + + # Deserialize messages from JSON storage + from pydantic import TypeAdapter + + adapter = TypeAdapter(list[OpenAIMessageParam]) + return adapter.validate_python(record["messages"]) diff --git a/llama_stack/providers/utils/scheduler.py b/src/llama_stack/providers/utils/scheduler.py similarity index 100% rename from llama_stack/providers/utils/scheduler.py rename to src/llama_stack/providers/utils/scheduler.py diff --git a/llama_stack/providers/remote/tool_runtime/__init__.py b/src/llama_stack/providers/utils/scoring/__init__.py similarity index 100% rename from llama_stack/providers/remote/tool_runtime/__init__.py rename to src/llama_stack/providers/utils/scoring/__init__.py diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py similarity index 95% rename from llama_stack/providers/utils/scoring/aggregation_utils.py rename to src/llama_stack/providers/utils/scoring/aggregation_utils.py index cff9a112f..aa6fe7248 100644 --- a/llama_stack/providers/utils/scoring/aggregation_utils.py +++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py @@ -6,8 +6,7 @@ import statistics from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import AggregationFunctionType +from llama_stack_api import AggregationFunctionType, ScoringResultRow def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]: diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py similarity index 96% rename from llama_stack/providers/utils/scoring/base_scoring_fn.py rename to src/llama_stack/providers/utils/scoring/base_scoring_fn.py index 2fae177b7..f372db8b5 100644 --- a/llama_stack/providers/utils/scoring/base_scoring_fn.py +++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py @@ -6,9 +6,8 @@ from abc import ABC, abstractmethod from typing import Any -from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics +from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow class BaseScoringFn(ABC): diff --git a/llama_stack/providers/utils/scoring/basic_scoring_utils.py b/src/llama_stack/providers/utils/scoring/basic_scoring_utils.py similarity index 100% rename from llama_stack/providers/utils/scoring/basic_scoring_utils.py rename to src/llama_stack/providers/utils/scoring/basic_scoring_utils.py diff --git a/llama_stack/providers/remote/vector_io/__init__.py b/src/llama_stack/providers/utils/tools/__init__.py similarity index 100% rename from llama_stack/providers/remote/vector_io/__init__.py rename to src/llama_stack/providers/utils/tools/__init__.py diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py new file mode 100644 index 000000000..05cdfa73b --- /dev/null +++ b/src/llama_stack/providers/utils/tools/mcp.py @@ -0,0 +1,230 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from enum import Enum +from typing import Any, cast + +import httpx +from mcp import ClientSession, McpError +from mcp import types as mcp_types +from mcp.client.sse import sse_client +from mcp.client.streamable_http import streamablehttp_client + +from llama_stack.core.datatypes import AuthenticationRequiredError +from llama_stack.log import get_logger +from llama_stack.providers.utils.tools.ttl_dict import TTLDict +from llama_stack_api import ( + ImageContentItem, + InterleavedContentItem, + ListToolDefsResponse, + TextContentItem, + ToolDef, + ToolInvocationResult, + _URLOrData, +) + +logger = get_logger(__name__, category="tools") + + +def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]: + """ + Prepare headers for MCP requests with authorization support. + + Args: + base_headers: Base headers dictionary (can be None) + authorization: OAuth access token (without "Bearer " prefix) + + Returns: + Headers dictionary with Authorization header if token provided + + Raises: + ValueError: If Authorization header is specified in the headers dict (security risk) + """ + headers = dict(base_headers or {}) + + # Security check: reject any Authorization header in the headers dict + # Users must use the authorization parameter instead to avoid security risks + existing_keys_lower = {k.lower() for k in headers.keys()} + if "authorization" in existing_keys_lower: + raise ValueError( + "For security reasons, Authorization header cannot be passed via 'headers'. " + "Please use the 'authorization' parameter instead." + ) + + # Add Authorization header if token provided + if authorization: + # OAuth access token - add "Bearer " prefix + headers["Authorization"] = f"Bearer {authorization}" + + return headers + + +protocol_cache = TTLDict(ttl_seconds=3600) + + +class MCPProtol(Enum): + UNKNOWN = 0 + STREAMABLE_HTTP = 1 + SSE = 2 + + +@asynccontextmanager +async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]: + # we use a ttl'd dict to cache the happy path protocol for each endpoint + # but, we always fall back to trying the other protocol if we cannot initialize the session + connection_strategies = [MCPProtol.STREAMABLE_HTTP, MCPProtol.SSE] + mcp_protocol = protocol_cache.get(endpoint, default=MCPProtol.UNKNOWN) + if mcp_protocol == MCPProtol.SSE: + connection_strategies = [MCPProtol.SSE, MCPProtol.STREAMABLE_HTTP] + + for i, strategy in enumerate(connection_strategies): + try: + client = streamablehttp_client + if strategy == MCPProtol.SSE: + # sse_client and streamablehttp_client have different signatures, but both + # are called the same way here, so we cast to Any to avoid type errors + client = cast(Any, sse_client) + + async with client(endpoint, headers=headers) as client_streams: + async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session: + await session.initialize() + protocol_cache[endpoint] = strategy + yield session + return + except* httpx.HTTPStatusError as eg: + for exc in eg.exceptions: + # mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter, + # so we explicitly cast each item to httpx.HTTPStatusError. This is safe because + # `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type. + err = cast(httpx.HTTPStatusError, exc) + if err.response.status_code == 401: + raise AuthenticationRequiredError(exc) from exc + if i == len(connection_strategies) - 1: + raise + except* httpx.ConnectError as eg: + # Connection refused, server down, network unreachable + if i == len(connection_strategies) - 1: + error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused" + logger.error(f"MCP connection error: {error_msg}") + raise ConnectionError(error_msg) from eg + else: + logger.warning( + f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}" + ) + except* httpx.TimeoutException as eg: + # Request timeout, server too slow + if i == len(connection_strategies) - 1: + error_msg = f"MCP server at {endpoint} timed out" + logger.error(f"MCP timeout error: {error_msg}") + raise TimeoutError(error_msg) from eg + else: + logger.warning( + f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}" + ) + except* httpx.RequestError as eg: + # DNS resolution failures, network errors, invalid URLs + if i == len(connection_strategies) - 1: + # Get the first exception's message for the error string + exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error" + error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}" + logger.error(f"MCP network error: {error_msg}") + raise ConnectionError(error_msg) from eg + else: + logger.warning( + f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}" + ) + except* McpError: + if i < len(connection_strategies) - 1: + logger.warning( + f"failed to connect via {strategy.name}, falling back to {connection_strategies[i + 1].name}" + ) + else: + raise + + +async def list_mcp_tools( + endpoint: str, + headers: dict[str, str] | None = None, + authorization: str | None = None, +) -> ListToolDefsResponse: + """List tools available from an MCP server. + + Args: + endpoint: MCP server endpoint URL + headers: Optional base headers to include + authorization: Optional OAuth access token (just the token, not "Bearer ") + + Returns: + List of tool definitions from the MCP server + + Raises: + ValueError: If Authorization is found in the headers parameter + """ + # Prepare headers with authorization handling + final_headers = prepare_mcp_headers(headers, authorization) + + tools = [] + async with client_wrapper(endpoint, final_headers) as session: + tools_result = await session.list_tools() + for tool in tools_result.tools: + tools.append( + ToolDef( + name=tool.name, + description=tool.description, + input_schema=tool.inputSchema, + output_schema=getattr(tool, "outputSchema", None), + metadata={ + "endpoint": endpoint, + }, + ) + ) + return ListToolDefsResponse(data=tools) + + +async def invoke_mcp_tool( + endpoint: str, + tool_name: str, + kwargs: dict[str, Any], + headers: dict[str, str] | None = None, + authorization: str | None = None, +) -> ToolInvocationResult: + """Invoke an MCP tool with the given arguments. + + Args: + endpoint: MCP server endpoint URL + tool_name: Name of the tool to invoke + kwargs: Tool invocation arguments + headers: Optional base headers to include + authorization: Optional OAuth access token (just the token, not "Bearer ") + + Returns: + Tool invocation result with content and error information + + Raises: + ValueError: If Authorization header is found in the headers parameter + """ + # Prepare headers with authorization handling + final_headers = prepare_mcp_headers(headers, authorization) + + async with client_wrapper(endpoint, final_headers) as session: + result = await session.call_tool(tool_name, kwargs) + + content: list[InterleavedContentItem] = [] + for item in result.content: + if isinstance(item, mcp_types.TextContent): + content.append(TextContentItem(text=item.text)) + elif isinstance(item, mcp_types.ImageContent): + content.append(ImageContentItem(image=_URLOrData(data=item.data))) + elif isinstance(item, mcp_types.EmbeddedResource): + logger.warning(f"EmbeddedResource is not supported: {item}") + else: + raise ValueError(f"Unknown content type: {type(item)}") + return ToolInvocationResult( + content=content, + error_code=1 if result.isError else 0, + ) diff --git a/llama_stack/providers/utils/tools/ttl_dict.py b/src/llama_stack/providers/utils/tools/ttl_dict.py similarity index 100% rename from llama_stack/providers/utils/tools/ttl_dict.py rename to src/llama_stack/providers/utils/tools/ttl_dict.py diff --git a/llama_stack/providers/utils/__init__.py b/src/llama_stack/providers/utils/vector_io/__init__.py similarity index 100% rename from llama_stack/providers/utils/__init__.py rename to src/llama_stack/providers/utils/vector_io/__init__.py diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/src/llama_stack/providers/utils/vector_io/vector_utils.py similarity index 100% rename from llama_stack/providers/utils/vector_io/vector_utils.py rename to src/llama_stack/providers/utils/vector_io/vector_utils.py diff --git a/llama_stack/providers/utils/bedrock/__init__.py b/src/llama_stack/telemetry/__init__.py similarity index 100% rename from llama_stack/providers/utils/bedrock/__init__.py rename to src/llama_stack/telemetry/__init__.py diff --git a/src/llama_stack/telemetry/constants.py b/src/llama_stack/telemetry/constants.py new file mode 100644 index 000000000..1d3db0742 --- /dev/null +++ b/src/llama_stack/telemetry/constants.py @@ -0,0 +1,27 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +This file contains constants used for naming data captured for telemetry. + +This is used to ensure that the data captured for telemetry is consistent and can be used to +identify and correlate data. If custom telemetry data is added to llama stack, please add +constants for it here. +""" + +llama_stack_prefix = "llama_stack" + +# Safety Attributes +RUN_SHIELD_OPERATION_NAME = "run_shield" + +SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request" +SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id" +SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages" + +SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response" +SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata" +SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level" +SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message" diff --git a/src/llama_stack/telemetry/helpers.py b/src/llama_stack/telemetry/helpers.py new file mode 100644 index 000000000..2ae13c9c5 --- /dev/null +++ b/src/llama_stack/telemetry/helpers.py @@ -0,0 +1,43 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json + +from opentelemetry import trace + +from llama_stack_api import OpenAIMessageParam, RunShieldResponse + +from .constants import ( + RUN_SHIELD_OPERATION_NAME, + SAFETY_REQUEST_MESSAGES_ATTRIBUTE, + SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, + SAFETY_RESPONSE_METADATA_ATTRIBUTE, + SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, + SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, +) + + +def safety_span_name(shield_id: str) -> str: + return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}" + + +# TODO: Consider using Wrapt to automatically instrument code +# This is the industry standard way to package automatically instrumentation in python. +def safety_request_span_attributes( + shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse +) -> None: + span = trace.get_current_span() + span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id) + messages_json = json.dumps([msg.model_dump() for msg in messages]) + span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json) + + if response.violation: + if response.violation.metadata: + metadata_json = json.dumps(response.violation.metadata) + span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json) + if response.violation.user_message: + span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message) + span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value) diff --git a/llama_stack/providers/utils/common/__init__.py b/src/llama_stack/testing/__init__.py similarity index 100% rename from llama_stack/providers/utils/common/__init__.py rename to src/llama_stack/testing/__init__.py diff --git a/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py similarity index 95% rename from llama_stack/testing/api_recorder.py rename to src/llama_stack/testing/api_recorder.py index 9e272ca3a..a7ad582f3 100644 --- a/llama_stack/testing/api_recorder.py +++ b/src/llama_stack/testing/api_recorder.py @@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice from llama_stack.core.testing_context import get_test_context, is_debug_mode # update the "finish_reason" field, since its type definition is wrong (no None is accepted) -CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None +CompletionChoice.model_fields["finish_reason"].annotation = cast( + type[Any] | None, Literal["stop", "length", "content_filter"] | None +) CompletionChoice.model_rebuild() -REPO_ROOT = Path(__file__).parent.parent.parent +REPO_ROOT = Path(__file__).parent.parent.parent.parent DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common" @@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any], } # Include test_id for isolation, except for shared infrastructure endpoints - if parsed.path not in ("/api/tags", "/v1/models"): + if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"): normalized["test_id"] = test_id normalized_json = json.dumps(normalized, sort_keys=True) @@ -428,7 +430,7 @@ class ResponseStorage: # For model-list endpoints, include digest in filename to distinguish different model sets endpoint = request.get("endpoint") - if endpoint in ("/api/tags", "/v1/models"): + if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"): digest = _model_identifiers_digest(endpoint, response) response_file = f"models-{request_hash}-{digest}.json" @@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str: Supported endpoints: - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ] - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ] + - '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ] Returns a list of unique identifiers or None if structure doesn't match. """ if "models" in response["body"]: # ollama items = response["body"]["models"] else: - # openai + # openai or openai-style endpoints items = response["body"] idents = [m.model if endpoint == "/api/tags" else m.id for m in items] return sorted(set(idents)) @@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) seen: dict[str, dict[str, Any]] = {} for rec in records: body = rec["response"]["body"] - if endpoint == "/v1/models": + if endpoint in ("/v1/models", "/v1/openai/v1/models"): for m in body: key = m.id seen[key] = m @@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) if endpoint == "/api/tags": from ollama import ListResponse - body = ListResponse(models=ordered) + # Both cast(Any, ...) and type: ignore are needed here: + # - cast(Any, ...) attempts to bypass type checking on the argument + # - type: ignore is still needed because mypy checks the call site independently + # and reports arg-type mismatch even after casting + body = ListResponse(models=cast(Any, ordered)) # type: ignore[arg-type] return {"request": canonical_req, "response": {"body": body, "is_streaming": False}} async def _patched_tool_invoke_method( - original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any] + original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None ): """Patched version of tool runtime invoke_tool method for recording/replay.""" global _current_mode, _current_storage if _current_mode == APIRecordingMode.LIVE or _current_storage is None: # Normal operation - return await original_method(self, tool_name, kwargs) + return await original_method(self, tool_name, kwargs, authorization=authorization) request_hash = normalize_tool_request(provider_name, tool_name, kwargs) @@ -627,7 +634,7 @@ async def _patched_tool_invoke_method( if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING): # Make the tool call and record it - result = await original_method(self, tool_name, kwargs) + result = await original_method(self, tool_name, kwargs, authorization=authorization) request_data = { "test_id": get_test_context(), @@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint logger.info(f" Test context: {get_test_context()}") if mode == APIRecordingMode.LIVE or storage is None: - if endpoint == "/v1/models": + if endpoint in ("/v1/models", "/v1/openai/v1/models"): return original_method(self, *args, **kwargs) else: return await original_method(self, *args, **kwargs) @@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint recording = None if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING: # Special handling for model-list endpoints: merge all recordings with this hash - if endpoint in ("/api/tags", "/v1/models"): + if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"): records = storage._model_list_responses(request_hash) recording = _combine_model_list_responses(endpoint, records) else: @@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint ) if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording): - if endpoint == "/v1/models": + if endpoint in ("/v1/models", "/v1/openai/v1/models"): response = original_method(self, *args, **kwargs) else: response = await original_method(self, *args, **kwargs) # we want to store the result of the iterator, not the iterator itself - if endpoint == "/v1/models": + if endpoint in ("/v1/models", "/v1/openai/v1/models"): response = [m async for m in response] request_data = { @@ -878,9 +885,11 @@ def patch_inference_clients(): OllamaAsyncClient.list = patched_ollama_list # Create patched methods for tool runtimes - async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]): + async def patched_tavily_invoke_tool( + self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None + ): return await _patched_tool_invoke_method( - _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs + _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization ) # Apply tool runtime patches diff --git a/src/llama_stack_api/README.md b/src/llama_stack_api/README.md new file mode 100644 index 000000000..9bf1d2726 --- /dev/null +++ b/src/llama_stack_api/README.md @@ -0,0 +1,103 @@ +# llama-stack-api + +API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs. + +## Overview + +`llama-stack-api` is a minimal dependency package that contains: + +- **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.) +- **Provider Specifications**: Provider spec definitions for building custom providers +- **Data Types**: Shared data types and models used across the Llama Stack ecosystem +- **Type Utilities**: Strong typing utilities and schema validation + +## What This Package Does NOT Include + +- Server implementation (see `llama-stack` package) +- Provider implementations (see `llama-stack` package) +- CLI tools (see `llama-stack` package) +- Runtime orchestration (see `llama-stack` package) + +## Use Cases + +This package is designed for: + +1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server +2. **Client Library Authors**: Use type definitions without server dependencies +3. **Documentation Generation**: Generate API docs from protocol definitions +4. **Type Checking**: Validate implementations against the official specs + +## Installation + +```bash +pip install llama-stack-api +``` + +Or with uv: + +```bash +uv pip install llama-stack-api +``` + +## Dependencies + +Minimal dependencies: +- `pydantic>=2.11.9` - For data validation and serialization +- `jsonschema` - For JSON schema utilities + +## Versioning + +This package follows semantic versioning independently from the main `llama-stack` package: + +- **Patch versions** (0.1.x): Documentation, internal improvements +- **Minor versions** (0.x.0): New APIs, backward-compatible changes +- **Major versions** (x.0.0): Breaking changes to existing APIs + +Current version: **0.4.0.dev0** + +## Usage Example + +```python +from llama_stack_api.inference import Inference, ChatCompletionRequest +from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec +from llama_stack_api.datatypes import Api + + +# Use protocol definitions for type checking +class MyInferenceProvider(Inference): + async def chat_completion(self, request: ChatCompletionRequest): + # Your implementation + pass + + +# Define provider specifications +my_provider_spec = InlineProviderSpec( + api=Api.inference, + provider_type="inline::my-provider", + pip_packages=["my-dependencies"], + module="my_package.providers.inference", + config_class="my_package.providers.inference.MyConfig", +) +``` + +## Relationship to llama-stack + +The main `llama-stack` package depends on `llama-stack-api` and provides: +- Full server implementation +- Built-in provider implementations +- CLI tools for running and managing stacks +- Runtime provider resolution and orchestration + +## Contributing + +See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines. + +## License + +MIT License - see LICENSE file for details. + +## Links + +- [Main Llama Stack Repository](https://github.com/llamastack/llama-stack) +- [Documentation](https://llamastack.ai/) +- [Client Library](https://pypi.org/project/llama-stack-client/) diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py new file mode 100644 index 000000000..b6fe2fd23 --- /dev/null +++ b/src/llama_stack_api/__init__.py @@ -0,0 +1,861 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Llama Stack API Specifications + +This package contains the API definitions, data types, and protocol specifications +for Llama Stack. It is designed to be a lightweight dependency for external providers +and clients that need to interact with Llama Stack APIs without requiring the full +server implementation. + +All imports from this package MUST use the form: + from llama_stack_api import + +Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported +and considered a code smell. All exported symbols are explicitly listed in __all__. +""" + +__version__ = "0.4.0.dev0" + +# Import submodules for those who need them +from . import common # noqa: F401 + +# Import all public API symbols +from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec +from .batches import Batches, BatchObject, ListBatchesResponse +from .benchmarks import ( + Benchmark, + BenchmarkInput, + Benchmarks, + CommonBenchmarkFields, + ListBenchmarksResponse, +) + +# Import commonly used types from common submodule +from .common.content_types import ( + URL, + ImageContentItem, + InterleavedContent, + InterleavedContentItem, + TextContentItem, + _URLOrData, +) +from .common.errors import ( + ConflictError, + DatasetNotFoundError, + InvalidConversationIdError, + ModelNotFoundError, + ModelTypeError, + ResourceNotFoundError, + TokenValidationError, + ToolGroupNotFoundError, + UnsupportedModelError, + VectorStoreNotFoundError, +) +from .common.job_types import Job, JobStatus +from .common.responses import Order, PaginatedResponse +from .common.training_types import Checkpoint, PostTrainingMetric +from .common.type_system import ( + ChatCompletionInputType, + CompletionInputType, + NumberType, + ParamType, + StringType, +) +from .conversations import ( + Conversation, + ConversationDeletedResource, + ConversationItem, + ConversationItemCreateRequest, + ConversationItemDeletedResource, + ConversationItemInclude, + ConversationItemList, + ConversationMessage, + Conversations, + Metadata, +) +from .datasetio import DatasetIO, DatasetStore +from .datasets import ( + CommonDatasetFields, + Dataset, + DatasetInput, + DatasetPurpose, + Datasets, + DatasetType, + DataSource, + ListDatasetsResponse, + RowsDataSource, + URIDataSource, +) +from .datatypes import ( + Api, + BenchmarksProtocolPrivate, + DatasetsProtocolPrivate, + DynamicApiMeta, + Error, + ExternalApiSpec, + HealthResponse, + HealthStatus, + InlineProviderSpec, + ModelsProtocolPrivate, + ProviderSpec, + RemoteProviderConfig, + RemoteProviderSpec, + RoutingTable, + ScoringFunctionsProtocolPrivate, + ShieldsProtocolPrivate, + ToolGroupsProtocolPrivate, + VectorStoresProtocolPrivate, +) +from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate +from .files import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, +) +from .inference import ( + Bf16QuantizationConfig, + ChatCompletionResponseEventType, + CompletionRequest, + EmbeddingsResponse, + EmbeddingTaskType, + Fp8QuantizationConfig, + GrammarResponseFormat, + GreedySamplingStrategy, + Inference, + InferenceProvider, + Int4QuantizationConfig, + JsonSchemaResponseFormat, + ListOpenAIChatCompletionResponse, + LogProbConfig, + ModelStore, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionMessageContent, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionTextOnlyMessageContent, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChatCompletionUsage, + OpenAIChatCompletionUsageCompletionTokensDetails, + OpenAIChatCompletionUsagePromptTokensDetails, + OpenAIChoice, + OpenAIChoiceDelta, + OpenAIChoiceLogprobs, + OpenAIChunkChoice, + OpenAICompletion, + OpenAICompletionChoice, + OpenAICompletionLogprobs, + OpenAICompletionRequestWithExtraBody, + OpenAICompletionWithInputMessages, + OpenAIDeveloperMessageParam, + OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIFile, + OpenAIFileFile, + OpenAIImageURL, + OpenAIJSONSchema, + OpenAIMessageParam, + OpenAIResponseFormatJSONObject, + OpenAIResponseFormatJSONSchema, + OpenAIResponseFormatParam, + OpenAIResponseFormatText, + OpenAISystemMessageParam, + OpenAITokenLogProb, + OpenAIToolMessageParam, + OpenAITopLogProb, + OpenAIUserMessageParam, + QuantizationConfig, + QuantizationType, + RerankData, + RerankResponse, + ResponseFormat, + ResponseFormatType, + SamplingParams, + SamplingStrategy, + SystemMessage, + SystemMessageBehavior, + TextTruncation, + TokenLogProbs, + ToolChoice, + ToolResponseMessage, + TopKSamplingStrategy, + TopPSamplingStrategy, + UserMessage, +) +from .inspect import ( + ApiFilter, + HealthInfo, + Inspect, + ListRoutesResponse, + RouteInfo, + VersionInfo, +) +from .models import ( + CommonModelFields, + ListModelsResponse, + Model, + ModelInput, + Models, + ModelType, + OpenAIListModelsResponse, + OpenAIModel, +) +from .openai_responses import ( + AllowedToolsFilter, + ApprovalFilter, + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + MCPListToolsTool, + OpenAIDeleteResponseObject, + OpenAIResponseAnnotationCitation, + OpenAIResponseAnnotationContainerFileCitation, + OpenAIResponseAnnotationFileCitation, + OpenAIResponseAnnotationFilePath, + OpenAIResponseAnnotations, + OpenAIResponseContentPart, + OpenAIResponseContentPartOutputText, + OpenAIResponseContentPartReasoningSummary, + OpenAIResponseContentPartReasoningText, + OpenAIResponseContentPartRefusal, + OpenAIResponseError, + OpenAIResponseInput, + OpenAIResponseInputFunctionToolCallOutput, + OpenAIResponseInputMessageContent, + OpenAIResponseInputMessageContentFile, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseInputToolFileSearch, + OpenAIResponseInputToolFunction, + OpenAIResponseInputToolMCP, + OpenAIResponseInputToolWebSearch, + OpenAIResponseMCPApprovalRequest, + OpenAIResponseMCPApprovalResponse, + OpenAIResponseMessage, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseCompleted, + OpenAIResponseObjectStreamResponseContentPartAdded, + OpenAIResponseObjectStreamResponseContentPartDone, + OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseObjectStreamResponseFailed, + OpenAIResponseObjectStreamResponseFileSearchCallCompleted, + OpenAIResponseObjectStreamResponseFileSearchCallInProgress, + OpenAIResponseObjectStreamResponseFileSearchCallSearching, + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta, + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone, + OpenAIResponseObjectStreamResponseIncomplete, + OpenAIResponseObjectStreamResponseInProgress, + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta, + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone, + OpenAIResponseObjectStreamResponseMcpCallCompleted, + OpenAIResponseObjectStreamResponseMcpCallFailed, + OpenAIResponseObjectStreamResponseMcpCallInProgress, + OpenAIResponseObjectStreamResponseMcpListToolsCompleted, + OpenAIResponseObjectStreamResponseMcpListToolsFailed, + OpenAIResponseObjectStreamResponseMcpListToolsInProgress, + OpenAIResponseObjectStreamResponseOutputItemAdded, + OpenAIResponseObjectStreamResponseOutputItemDone, + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded, + OpenAIResponseObjectStreamResponseOutputTextDelta, + OpenAIResponseObjectStreamResponseOutputTextDone, + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded, + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone, + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta, + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone, + OpenAIResponseObjectStreamResponseReasoningTextDelta, + OpenAIResponseObjectStreamResponseReasoningTextDone, + OpenAIResponseObjectStreamResponseRefusalDelta, + OpenAIResponseObjectStreamResponseRefusalDone, + OpenAIResponseObjectStreamResponseWebSearchCallCompleted, + OpenAIResponseObjectStreamResponseWebSearchCallInProgress, + OpenAIResponseObjectStreamResponseWebSearchCallSearching, + OpenAIResponseObjectWithInput, + OpenAIResponseOutput, + OpenAIResponseOutputMessageContent, + OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageFileSearchToolCall, + OpenAIResponseOutputMessageFileSearchToolCallResults, + OpenAIResponseOutputMessageFunctionToolCall, + OpenAIResponseOutputMessageMCPCall, + OpenAIResponseOutputMessageMCPListTools, + OpenAIResponseOutputMessageWebSearchToolCall, + OpenAIResponsePrompt, + OpenAIResponseText, + OpenAIResponseTextFormat, + OpenAIResponseTool, + OpenAIResponseToolMCP, + OpenAIResponseUsage, + OpenAIResponseUsageInputTokensDetails, + OpenAIResponseUsageOutputTokensDetails, + WebSearchToolTypes, +) +from .post_training import ( + AlgorithmConfig, + DataConfig, + DatasetFormat, + DPOAlignmentConfig, + DPOLossType, + EfficiencyConfig, + ListPostTrainingJobsResponse, + LoraFinetuningConfig, + OptimizerConfig, + OptimizerType, + PostTraining, + PostTrainingJob, + PostTrainingJobArtifactsResponse, + PostTrainingJobLogStream, + PostTrainingJobStatusResponse, + PostTrainingRLHFRequest, + QATFinetuningConfig, + RLHFAlgorithm, + TrainingConfig, +) +from .prompts import ListPromptsResponse, Prompt, Prompts +from .providers import ListProvidersResponse, ProviderInfo, Providers +from .rag_tool import ( + DefaultRAGQueryGeneratorConfig, + LLMRAGQueryGeneratorConfig, + RAGDocument, + RAGQueryConfig, + RAGQueryGenerator, + RAGQueryGeneratorConfig, + RAGQueryResult, + RAGSearchMode, + Ranker, + RRFRanker, + WeightedRanker, +) +from .resource import Resource, ResourceType +from .safety import ( + ModerationObject, + ModerationObjectResults, + RunShieldResponse, + Safety, + SafetyViolation, + ShieldStore, + ViolationLevel, +) +from .schema_utils import ( + CallableT, + ExtraBodyField, + SchemaInfo, + WebMethod, + clear_dynamic_schema_types, + get_registered_schema_info, + iter_dynamic_schema_types, + iter_json_schema_types, + iter_registered_schema_types, + json_schema_type, + register_dynamic_schema_type, + register_schema, + webmethod, +) +from .scoring import ( + ScoreBatchResponse, + ScoreResponse, + Scoring, + ScoringFunctionStore, + ScoringResult, + ScoringResultRow, +) +from .scoring_functions import ( + AggregationFunctionType, + BasicScoringFnParams, + CommonScoringFnFields, + ListScoringFunctionsResponse, + LLMAsJudgeScoringFnParams, + RegexParserScoringFnParams, + ScoringFn, + ScoringFnInput, + ScoringFnParams, + ScoringFnParamsType, + ScoringFunctions, +) +from .shields import ( + CommonShieldFields, + ListShieldsResponse, + Shield, + ShieldInput, + Shields, +) +from .tools import ( + ListToolDefsResponse, + ListToolGroupsResponse, + SpecialToolGroup, + ToolDef, + ToolGroup, + ToolGroupInput, + ToolGroups, + ToolInvocationResult, + ToolRuntime, + ToolStore, +) +from .vector_io import ( + Chunk, + ChunkMetadata, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, + QueryChunksResponse, + SearchRankingOptions, + VectorIO, + VectorStoreChunkingStrategy, + VectorStoreChunkingStrategyAuto, + VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, + VectorStoreContent, + VectorStoreCreateRequest, + VectorStoreDeleteResponse, + VectorStoreFileBatchObject, + VectorStoreFileContentResponse, + VectorStoreFileCounts, + VectorStoreFileDeleteResponse, + VectorStoreFileLastError, + VectorStoreFileObject, + VectorStoreFilesListInBatchResponse, + VectorStoreFileStatus, + VectorStoreListFilesResponse, + VectorStoreListResponse, + VectorStoreModifyRequest, + VectorStoreObject, + VectorStoreSearchRequest, + VectorStoreSearchResponse, + VectorStoreSearchResponsePage, + VectorStoreTable, +) +from .vector_stores import VectorStore, VectorStoreInput +from .version import ( + LLAMA_STACK_API_V1, + LLAMA_STACK_API_V1ALPHA, + LLAMA_STACK_API_V1BETA, +) + +__all__ = [ + # Submodules + "common", + # Version constants + "LLAMA_STACK_API_V1", + "LLAMA_STACK_API_V1ALPHA", + "LLAMA_STACK_API_V1BETA", + # API Symbols + "Agents", + "AggregationFunctionType", + "AlgorithmConfig", + "AllowedToolsFilter", + "Api", + "ApiFilter", + "ApprovalFilter", + "BasicScoringFnParams", + "Batches", + "BatchObject", + "Benchmark", + "BenchmarkConfig", + "BenchmarkInput", + "Benchmarks", + "BenchmarksProtocolPrivate", + "Bf16QuantizationConfig", + "CallableT", + "ChatCompletionInputType", + "ChatCompletionResponseEventType", + "Checkpoint", + "Chunk", + "ChunkMetadata", + "CommonBenchmarkFields", + "ConflictError", + "CommonDatasetFields", + "CommonModelFields", + "CommonScoringFnFields", + "CommonShieldFields", + "CompletionInputType", + "CompletionRequest", + "Conversation", + "ConversationDeletedResource", + "ConversationItem", + "ConversationItemCreateRequest", + "ConversationItemDeletedResource", + "ConversationItemInclude", + "ConversationItemList", + "ConversationMessage", + "Conversations", + "DPOAlignmentConfig", + "DPOLossType", + "DataConfig", + "DataSource", + "Dataset", + "DatasetFormat", + "DatasetIO", + "DatasetInput", + "DatasetPurpose", + "DatasetNotFoundError", + "DatasetStore", + "DatasetType", + "Datasets", + "DatasetsProtocolPrivate", + "DefaultRAGQueryGeneratorConfig", + "Docstring", + "DynamicApiMeta", + "EfficiencyConfig", + "EmbeddingTaskType", + "EmbeddingsResponse", + "Error", + "Eval", + "EvalCandidate", + "EvaluateResponse", + "ExpiresAfter", + "ExternalApiSpec", + "ExtraBodyField", + "Files", + "Fp8QuantizationConfig", + "clear_dynamic_schema_types", + "get_schema_identifier", + "get_signature", + "GrammarResponseFormat", + "GreedySamplingStrategy", + "HealthInfo", + "HealthResponse", + "HealthStatus", + "ImageContentItem", + "Inference", + "InferenceProvider", + "InlineProviderSpec", + "Inspect", + "Int4QuantizationConfig", + "InterleavedContent", + "InterleavedContentItem", + "InvalidConversationIdError", + "is_generic_list", + "is_type_optional", + "is_type_union", + "is_unwrapped_body_param", + "iter_dynamic_schema_types", + "iter_json_schema_types", + "iter_registered_schema_types", + "get_registered_schema_info", + "Job", + "JobStatus", + "json_dump_string", + "json_schema_type", + "JsonSchemaGenerator", + "JsonSchemaResponseFormat", + "JsonType", + "LLMAsJudgeScoringFnParams", + "LLMRAGQueryGeneratorConfig", + "ListBatchesResponse", + "ListBenchmarksResponse", + "ListDatasetsResponse", + "ListModelsResponse", + "ListOpenAIChatCompletionResponse", + "ListOpenAIFileResponse", + "ListOpenAIResponseInputItem", + "ListOpenAIResponseObject", + "ListPostTrainingJobsResponse", + "ListPromptsResponse", + "ListProvidersResponse", + "ListRoutesResponse", + "ListScoringFunctionsResponse", + "ListShieldsResponse", + "ListToolDefsResponse", + "ListToolGroupsResponse", + "LogProbConfig", + "LoraFinetuningConfig", + "MCPListToolsTool", + "Metadata", + "Model", + "ModelCandidate", + "ModelInput", + "ModelNotFoundError", + "ModelStore", + "ModelType", + "ModelTypeError", + "Models", + "ModelsProtocolPrivate", + "ModerationObject", + "ModerationObjectResults", + "NumberType", + "object_to_json", + "OpenAIAssistantMessageParam", + "OpenAIChatCompletion", + "OpenAIChatCompletionChunk", + "OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionContentPartParam", + "OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionMessageContent", + "OpenAIChatCompletionRequestWithExtraBody", + "OpenAIChatCompletionTextOnlyMessageContent", + "OpenAIChatCompletionToolCall", + "OpenAIChatCompletionToolCallFunction", + "OpenAIChatCompletionUsage", + "OpenAIChatCompletionUsageCompletionTokensDetails", + "OpenAIChatCompletionUsagePromptTokensDetails", + "OpenAIChoice", + "OpenAIChoiceDelta", + "OpenAIChoiceLogprobs", + "OpenAIChunkChoice", + "OpenAICompletion", + "OpenAICompletionChoice", + "OpenAICompletionLogprobs", + "OpenAICompletionRequestWithExtraBody", + "OpenAICompletionWithInputMessages", + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "OpenAICreateVectorStoreRequestWithExtraBody", + "OpenAIDeleteResponseObject", + "OpenAIDeveloperMessageParam", + "OpenAIEmbeddingData", + "OpenAIEmbeddingUsage", + "OpenAIEmbeddingsRequestWithExtraBody", + "OpenAIEmbeddingsResponse", + "OpenAIFile", + "OpenAIFileDeleteResponse", + "OpenAIFileFile", + "OpenAIFileObject", + "OpenAIFilePurpose", + "OpenAIImageURL", + "OpenAIJSONSchema", + "OpenAIListModelsResponse", + "OpenAIMessageParam", + "OpenAIModel", + "Order", + "OpenAIResponseAnnotationCitation", + "OpenAIResponseAnnotationContainerFileCitation", + "OpenAIResponseAnnotationFileCitation", + "OpenAIResponseAnnotationFilePath", + "OpenAIResponseAnnotations", + "OpenAIResponseContentPart", + "OpenAIResponseContentPartOutputText", + "OpenAIResponseContentPartReasoningSummary", + "OpenAIResponseContentPartReasoningText", + "OpenAIResponseContentPartRefusal", + "OpenAIResponseError", + "OpenAIResponseFormatJSONObject", + "OpenAIResponseFormatJSONSchema", + "OpenAIResponseFormatParam", + "OpenAIResponseFormatText", + "OpenAIResponseInput", + "OpenAIResponseInputFunctionToolCallOutput", + "OpenAIResponseInputMessageContent", + "OpenAIResponseInputMessageContentFile", + "OpenAIResponseInputMessageContentImage", + "OpenAIResponseInputMessageContentText", + "OpenAIResponseInputTool", + "OpenAIResponseInputToolFileSearch", + "OpenAIResponseInputToolFunction", + "OpenAIResponseInputToolMCP", + "OpenAIResponseInputToolWebSearch", + "OpenAIResponseMCPApprovalRequest", + "OpenAIResponseMCPApprovalResponse", + "OpenAIResponseMessage", + "OpenAIResponseObject", + "OpenAIResponseObjectStream", + "OpenAIResponseObjectStreamResponseCompleted", + "OpenAIResponseObjectStreamResponseContentPartAdded", + "OpenAIResponseObjectStreamResponseContentPartDone", + "OpenAIResponseObjectStreamResponseCreated", + "OpenAIResponseObjectStreamResponseFailed", + "OpenAIResponseObjectStreamResponseFileSearchCallCompleted", + "OpenAIResponseObjectStreamResponseFileSearchCallInProgress", + "OpenAIResponseObjectStreamResponseFileSearchCallSearching", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", + "OpenAIResponseObjectStreamResponseInProgress", + "OpenAIResponseObjectStreamResponseIncomplete", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone", + "OpenAIResponseObjectStreamResponseMcpCallCompleted", + "OpenAIResponseObjectStreamResponseMcpCallFailed", + "OpenAIResponseObjectStreamResponseMcpCallInProgress", + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted", + "OpenAIResponseObjectStreamResponseMcpListToolsFailed", + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress", + "OpenAIResponseObjectStreamResponseOutputItemAdded", + "OpenAIResponseObjectStreamResponseOutputItemDone", + "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded", + "OpenAIResponseObjectStreamResponseOutputTextDelta", + "OpenAIResponseObjectStreamResponseOutputTextDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone", + "OpenAIResponseObjectStreamResponseReasoningTextDelta", + "OpenAIResponseObjectStreamResponseReasoningTextDone", + "OpenAIResponseObjectStreamResponseRefusalDelta", + "OpenAIResponseObjectStreamResponseRefusalDone", + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted", + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress", + "OpenAIResponseObjectStreamResponseWebSearchCallSearching", + "OpenAIResponseObjectWithInput", + "OpenAIResponseOutput", + "OpenAIResponseOutputMessageContent", + "OpenAIResponseOutputMessageContentOutputText", + "OpenAIResponseOutputMessageFileSearchToolCall", + "OpenAIResponseOutputMessageFileSearchToolCallResults", + "OpenAIResponseOutputMessageFunctionToolCall", + "OpenAIResponseOutputMessageMCPCall", + "OpenAIResponseOutputMessageMCPListTools", + "OpenAIResponseOutputMessageWebSearchToolCall", + "OpenAIResponsePrompt", + "OpenAIResponseText", + "OpenAIResponseTextFormat", + "OpenAIResponseTool", + "OpenAIResponseToolMCP", + "OpenAIResponseUsage", + "OpenAIResponseUsageInputTokensDetails", + "OpenAIResponseUsageOutputTokensDetails", + "OpenAISystemMessageParam", + "OpenAITokenLogProb", + "OpenAIToolMessageParam", + "OpenAITopLogProb", + "OpenAIUserMessageParam", + "OptimizerConfig", + "OptimizerType", + "PaginatedResponse", + "ParamType", + "parse_type", + "PostTraining", + "PostTrainingMetric", + "PostTrainingJob", + "PostTrainingJobArtifactsResponse", + "PostTrainingJobLogStream", + "PostTrainingJobStatusResponse", + "PostTrainingRLHFRequest", + "Prompt", + "Prompts", + "ProviderInfo", + "ProviderSpec", + "Providers", + "python_type_to_name", + "QATFinetuningConfig", + "QuantizationConfig", + "QuantizationType", + "QueryChunksResponse", + "RAGDocument", + "RAGQueryConfig", + "RAGQueryGenerator", + "RAGQueryGeneratorConfig", + "RAGQueryResult", + "RAGSearchMode", + "register_dynamic_schema_type", + "register_schema", + "RLHFAlgorithm", + "RRFRanker", + "Ranker", + "RegexParserScoringFnParams", + "RemoteProviderConfig", + "RemoteProviderSpec", + "RerankData", + "RerankResponse", + "Resource", + "ResourceNotFoundError", + "ResourceType", + "ResponseFormat", + "ResponseFormatType", + "ResponseGuardrail", + "ResponseGuardrailSpec", + "RouteInfo", + "RoutingTable", + "RowsDataSource", + "RunShieldResponse", + "Safety", + "SafetyViolation", + "SamplingParams", + "SamplingStrategy", + "ScoreBatchResponse", + "ScoreResponse", + "Scoring", + "ScoringFn", + "ScoringFnInput", + "ScoringFnParams", + "ScoringFnParamsType", + "ScoringFunctionStore", + "ScoringFunctions", + "ScoringFunctionsProtocolPrivate", + "ScoringResult", + "ScoringResultRow", + "Schema", + "SchemaInfo", + "SchemaOptions", + "SearchRankingOptions", + "Shield", + "ShieldInput", + "ShieldStore", + "Shields", + "ShieldsProtocolPrivate", + "SpecialToolGroup", + "StrictJsonType", + "StringType", + "SystemMessage", + "SystemMessageBehavior", + "TextContentItem", + "TextTruncation", + "TokenLogProbs", + "TokenValidationError", + "ToolChoice", + "ToolGroupNotFoundError", + "ToolDef", + "ToolGroup", + "ToolGroupInput", + "ToolGroups", + "ToolGroupsProtocolPrivate", + "ToolInvocationResult", + "ToolResponseMessage", + "ToolRuntime", + "ToolStore", + "TopKSamplingStrategy", + "TopPSamplingStrategy", + "TrainingConfig", + "UnsupportedModelError", + "unwrap_generic_list", + "unwrap_optional_type", + "unwrap_union_types", + "URIDataSource", + "URL", + "_URLOrData", + "UserMessage", + "VectorIO", + "VectorStore", + "VectorStoreChunkingStrategy", + "VectorStoreChunkingStrategyAuto", + "VectorStoreChunkingStrategyStatic", + "VectorStoreChunkingStrategyStaticConfig", + "VectorStoreContent", + "VectorStoreCreateRequest", + "VectorStoreDeleteResponse", + "VectorStoreFileBatchObject", + "VectorStoreFileContentResponse", + "VectorStoreFileCounts", + "VectorStoreFileDeleteResponse", + "VectorStoreFileLastError", + "VectorStoreFileObject", + "VectorStoreFileStatus", + "VectorStoreFilesListInBatchResponse", + "VectorStoreInput", + "VectorStoreListFilesResponse", + "VectorStoreListResponse", + "VectorStoreModifyRequest", + "VectorStoreObject", + "VectorStoreSearchRequest", + "VectorStoreSearchResponse", + "VectorStoreSearchResponsePage", + "VectorStoreTable", + "VectorStoreNotFoundError", + "VectorStoresProtocolPrivate", + "VersionInfo", + "ViolationLevel", + "webmethod", + "WebMethod", + "WebSearchToolTypes", + "WeightedRanker", +] diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py new file mode 100644 index 000000000..8d3b489e1 --- /dev/null +++ b/src/llama_stack_api/agents.py @@ -0,0 +1,156 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncIterator +from typing import Annotated, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack_api.common.responses import Order +from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + +from .openai_responses import ( + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + OpenAIDeleteResponseObject, + OpenAIResponseInput, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponsePrompt, + OpenAIResponseText, +) + + +@json_schema_type +class ResponseGuardrailSpec(BaseModel): + """Specification for a guardrail to apply during response generation. + + :param type: The type/identifier of the guardrail. + """ + + type: str + # TODO: more fields to be added for guardrail configuration + + +ResponseGuardrail = str | ResponseGuardrailSpec + + +@runtime_checkable +class Agents(Protocol): + """Agents + + APIs for creating and interacting with agentic systems.""" + + # We situate the OpenAI Responses API in the Agents API just like we did things + # for Inference. The Responses API, in its intent, serves the same purpose as + # the Agents API above -- it is essentially a lightweight "agentic loop" with + # integrated tool calling. + # + # Both of these APIs are inherently stateful. + + @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1) + async def get_openai_response( + self, + response_id: str, + ) -> OpenAIResponseObject: + """Get a model response. + + :param response_id: The ID of the OpenAI response to retrieve. + :returns: An OpenAIResponseObject. + """ + ... + + @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1) + async def create_openai_response( + self, + input: str | list[OpenAIResponseInput], + model: str, + prompt: OpenAIResponsePrompt | None = None, + instructions: str | None = None, + parallel_tool_calls: bool | None = True, + previous_response_id: str | None = None, + conversation: str | None = None, + store: bool | None = True, + stream: bool | None = False, + temperature: float | None = None, + text: OpenAIResponseText | None = None, + tools: list[OpenAIResponseInputTool] | None = None, + include: list[str] | None = None, + max_infer_iters: int | None = 10, # this is an extension to the OpenAI API + guardrails: Annotated[ + list[ResponseGuardrail] | None, + ExtraBodyField( + "List of guardrails to apply during response generation. Guardrails provide safety and content moderation." + ), + ] = None, + max_tool_calls: int | None = None, + metadata: dict[str, str] | None = None, + ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: + """Create a model response. + + :param input: Input message(s) to create the response. + :param model: The underlying LLM used for completions. + :param prompt: (Optional) Prompt object with ID, version, and variables. + :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. + :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation. + :param include: (Optional) Additional fields to include in the response. + :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications. + :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response. + :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response. + :returns: An OpenAIResponseObject. + """ + ... + + @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1) + async def list_openai_responses( + self, + after: str | None = None, + limit: int | None = 50, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseObject: + """List all responses. + + :param after: The ID of the last response to return. + :param limit: The number of responses to return. + :param model: The model to filter responses by. + :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc'). + :returns: A ListOpenAIResponseObject. + """ + ... + + @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1) + async def list_openai_response_input_items( + self, + response_id: str, + after: str | None = None, + before: str | None = None, + include: list[str] | None = None, + limit: int | None = 20, + order: Order | None = Order.desc, + ) -> ListOpenAIResponseInputItem: + """List input items. + + :param response_id: The ID of the response to retrieve input items for. + :param after: An item ID to list items after, used for pagination. + :param before: An item ID to list items before, used for pagination. + :param include: Additional fields to include in the response. + :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. + :param order: The order to return the input items in. Default is desc. + :returns: An ListOpenAIResponseInputItem. + """ + ... + + @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1) + async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: + """Delete a response. + + :param response_id: The ID of the OpenAI response to delete. + :returns: An OpenAIDeleteResponseObject + """ + ... diff --git a/src/llama_stack_api/batches.py b/src/llama_stack_api/batches.py new file mode 100644 index 000000000..00c47d39f --- /dev/null +++ b/src/llama_stack_api/batches.py @@ -0,0 +1,96 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Literal, Protocol, runtime_checkable + +from pydantic import BaseModel, Field + +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + +try: + from openai.types import Batch as BatchObject +except ImportError as e: + raise ImportError("OpenAI package is required for batches API. Please install it with: pip install openai") from e + + +@json_schema_type +class ListBatchesResponse(BaseModel): + """Response containing a list of batch objects.""" + + object: Literal["list"] = "list" + data: list[BatchObject] = Field(..., description="List of batch objects") + first_id: str | None = Field(default=None, description="ID of the first batch in the list") + last_id: str | None = Field(default=None, description="ID of the last batch in the list") + has_more: bool = Field(default=False, description="Whether there are more batches available") + + +@runtime_checkable +class Batches(Protocol): + """ + The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. + + The API is designed to allow use of openai client libraries for seamless integration. + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes. + """ + + @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1) + async def create_batch( + self, + input_file_id: str, + endpoint: str, + completion_window: Literal["24h"], + metadata: dict[str, str] | None = None, + idempotency_key: str | None = None, + ) -> BatchObject: + """Create a new batch for processing multiple API requests. + + :param input_file_id: The ID of an uploaded file containing requests for the batch. + :param endpoint: The endpoint to be used for all requests in the batch. + :param completion_window: The time window within which the batch should be processed. + :param metadata: Optional metadata for the batch. + :param idempotency_key: Optional idempotency key. When provided, enables idempotent behavior. + :returns: The created batch object. + """ + ... + + @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1) + async def retrieve_batch(self, batch_id: str) -> BatchObject: + """Retrieve information about a specific batch. + + :param batch_id: The ID of the batch to retrieve. + :returns: The batch object. + """ + ... + + @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1) + async def cancel_batch(self, batch_id: str) -> BatchObject: + """Cancel a batch that is in progress. + + :param batch_id: The ID of the batch to cancel. + :returns: The updated batch object. + """ + ... + + @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1) + async def list_batches( + self, + after: str | None = None, + limit: int = 20, + ) -> ListBatchesResponse: + """List all batches for the current user. + + :param after: A cursor for pagination; returns batches after this batch ID. + :param limit: Number of batches to return (default 20, max 100). + :returns: A list of batch objects. + """ + ... diff --git a/src/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py new file mode 100644 index 000000000..fdb2ccad4 --- /dev/null +++ b/src/llama_stack_api/benchmarks.py @@ -0,0 +1,105 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any, Literal, Protocol, runtime_checkable + +from pydantic import BaseModel, Field + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA + + +class CommonBenchmarkFields(BaseModel): + dataset_id: str + scoring_functions: list[str] + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Metadata for this evaluation task", + ) + + +@json_schema_type +class Benchmark(CommonBenchmarkFields, Resource): + """A benchmark resource for evaluating model performance. + + :param dataset_id: Identifier of the dataset to use for the benchmark evaluation + :param scoring_functions: List of scoring function identifiers to apply during evaluation + :param metadata: Metadata for this evaluation task + :param type: The resource type, always benchmark + """ + + type: Literal[ResourceType.benchmark] = ResourceType.benchmark + + @property + def benchmark_id(self) -> str: + return self.identifier + + @property + def provider_benchmark_id(self) -> str | None: + return self.provider_resource_id + + +class BenchmarkInput(CommonBenchmarkFields, BaseModel): + benchmark_id: str + provider_id: str | None = None + provider_benchmark_id: str | None = None + + +@json_schema_type +class ListBenchmarksResponse(BaseModel): + data: list[Benchmark] + + +@runtime_checkable +class Benchmarks(Protocol): + @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def list_benchmarks(self) -> ListBenchmarksResponse: + """List all benchmarks. + + :returns: A ListBenchmarksResponse. + """ + ... + + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def get_benchmark( + self, + benchmark_id: str, + ) -> Benchmark: + """Get a benchmark by its ID. + + :param benchmark_id: The ID of the benchmark to get. + :returns: A Benchmark. + """ + ... + + @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True) + async def register_benchmark( + self, + benchmark_id: str, + dataset_id: str, + scoring_functions: list[str], + provider_benchmark_id: str | None = None, + provider_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + """Register a benchmark. + + :param benchmark_id: The ID of the benchmark to register. + :param dataset_id: The ID of the dataset to use for the benchmark. + :param scoring_functions: The scoring functions to use for the benchmark. + :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark. + :param provider_id: The ID of the provider to use for the benchmark. + :param metadata: The metadata to use for the benchmark. + """ + ... + + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True) + async def unregister_benchmark(self, benchmark_id: str) -> None: + """Unregister a benchmark. + + :param benchmark_id: The ID of the benchmark to unregister. + """ + ... diff --git a/llama_stack/providers/utils/datasetio/__init__.py b/src/llama_stack_api/common/__init__.py similarity index 100% rename from llama_stack/providers/utils/datasetio/__init__.py rename to src/llama_stack_api/common/__init__.py diff --git a/src/llama_stack_api/common/content_types.py b/src/llama_stack_api/common/content_types.py new file mode 100644 index 000000000..1bfe109c1 --- /dev/null +++ b/src/llama_stack_api/common/content_types.py @@ -0,0 +1,101 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Annotated, Literal + +from pydantic import BaseModel, Field, model_validator + +from llama_stack_api.schema_utils import json_schema_type, register_schema + + +@json_schema_type +class URL(BaseModel): + """A URL reference to external content. + + :param uri: The URL string pointing to the resource + """ + + uri: str + + +class _URLOrData(BaseModel): + """ + A URL or a base64 encoded string + + :param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. + :param data: base64 encoded image data as string + """ + + url: URL | None = None + # data is a base64 encoded string, hint with contentEncoding=base64 + data: str | None = Field(default=None, json_schema_extra={"contentEncoding": "base64"}) + + @model_validator(mode="before") + @classmethod + def validator(cls, values): + if isinstance(values, dict): + return values + return {"url": values} + + +@json_schema_type +class ImageContentItem(BaseModel): + """A image content item + + :param type: Discriminator type of the content item. Always "image" + :param image: Image as a base64 encoded string or an URL + """ + + type: Literal["image"] = "image" + image: _URLOrData + + +@json_schema_type +class TextContentItem(BaseModel): + """A text content item + + :param type: Discriminator type of the content item. Always "text" + :param text: Text content + """ + + type: Literal["text"] = "text" + text: str + + +# other modalities can be added here +InterleavedContentItem = Annotated[ + ImageContentItem | TextContentItem, + Field(discriminator="type"), +] +register_schema(InterleavedContentItem, name="InterleavedContentItem") + +# accept a single "str" as a special case since it is common +InterleavedContent = str | InterleavedContentItem | list[InterleavedContentItem] +register_schema(InterleavedContent, name="InterleavedContent") + + +@json_schema_type +class TextDelta(BaseModel): + """A text content delta for streaming responses. + + :param type: Discriminator type of the delta. Always "text" + :param text: The incremental text content + """ + + type: Literal["text"] = "text" + text: str + + +@json_schema_type +class ImageDelta(BaseModel): + """An image content delta for streaming responses. + + :param type: Discriminator type of the delta. Always "image" + :param image: The incremental image data as bytes + """ + + type: Literal["image"] = "image" + image: bytes diff --git a/llama_stack/apis/common/errors.py b/src/llama_stack_api/common/errors.py similarity index 92% rename from llama_stack/apis/common/errors.py rename to src/llama_stack_api/common/errors.py index a421d0c6f..de938b249 100644 --- a/llama_stack/apis/common/errors.py +++ b/src/llama_stack_api/common/errors.py @@ -56,14 +56,6 @@ class ToolGroupNotFoundError(ResourceNotFoundError): super().__init__(toolgroup_name, "Tool Group", "client.toolgroups.list()") -class SessionNotFoundError(ValueError): - """raised when Llama Stack cannot find a referenced session or access is denied""" - - def __init__(self, session_name: str) -> None: - message = f"Session '{session_name}' not found or access denied." - super().__init__(message) - - class ModelTypeError(TypeError): """raised when a model is present but not the correct type""" diff --git a/llama_stack/apis/common/job_types.py b/src/llama_stack_api/common/job_types.py similarity index 94% rename from llama_stack/apis/common/job_types.py rename to src/llama_stack_api/common/job_types.py index 5da42bfd3..b6ef35d7f 100644 --- a/llama_stack/apis/common/job_types.py +++ b/src/llama_stack_api/common/job_types.py @@ -7,7 +7,7 @@ from enum import Enum from pydantic import BaseModel -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.schema_utils import json_schema_type class JobStatus(Enum): diff --git a/src/llama_stack_api/common/responses.py b/src/llama_stack_api/common/responses.py new file mode 100644 index 000000000..c843ce1d9 --- /dev/null +++ b/src/llama_stack_api/common/responses.py @@ -0,0 +1,77 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum +from typing import Any + +from pydantic import BaseModel + +from llama_stack_api.schema_utils import json_schema_type + + +class Order(Enum): + """Sort order for paginated responses. + :cvar asc: Ascending order + :cvar desc: Descending order + """ + + asc = "asc" + desc = "desc" + + +@json_schema_type +class PaginatedResponse(BaseModel): + """A generic paginated response that follows a simple format. + + :param data: The list of items for the current page + :param has_more: Whether there are more items available after this set + :param url: The URL for accessing this list + """ + + data: list[dict[str, Any]] + has_more: bool + url: str | None = None + + +# This is a short term solution to allow inference API to return metrics +# The ideal way to do this is to have a way for all response types to include metrics +# and all metric events logged to the telemetry API to be included with the response +# To do this, we will need to augment all response types with a metrics field. +# We have hit a blocker from stainless SDK that prevents us from doing this. +# The blocker is that if we were to augment the response types that have a data field +# in them like so +# class ListModelsResponse(BaseModel): +# metrics: Optional[List[MetricEvent]] = None +# data: List[Models] +# ... +# The client SDK will need to access the data by using a .data field, which is not +# ergonomic. Stainless SDK does support unwrapping the response type, but it +# requires that the response type to only have a single field. + +# We will need a way in the client SDK to signal that the metrics are needed +# and if they are needed, the client SDK has to return the full response type +# without unwrapping it. + + +@json_schema_type +class MetricInResponse(BaseModel): + """A metric value included in API responses. + :param metric: The name of the metric + :param value: The numeric value of the metric + :param unit: (Optional) The unit of measurement for the metric value + """ + + metric: str + value: int | float + unit: str | None = None + + +class MetricResponseMixin(BaseModel): + """Mixin class for API responses that can include metrics. + :param metrics: (Optional) List of metrics associated with the API response + """ + + metrics: list[MetricInResponse] | None = None diff --git a/llama_stack/apis/common/training_types.py b/src/llama_stack_api/common/training_types.py similarity index 96% rename from llama_stack/apis/common/training_types.py rename to src/llama_stack_api/common/training_types.py index 5c236a25d..aa3481770 100644 --- a/llama_stack/apis/common/training_types.py +++ b/src/llama_stack_api/common/training_types.py @@ -8,7 +8,7 @@ from datetime import datetime from pydantic import BaseModel -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.schema_utils import json_schema_type @json_schema_type diff --git a/llama_stack/apis/common/type_system.py b/src/llama_stack_api/common/type_system.py similarity index 88% rename from llama_stack/apis/common/type_system.py rename to src/llama_stack_api/common/type_system.py index 0e62ee484..8297713cf 100644 --- a/llama_stack/apis/common/type_system.py +++ b/src/llama_stack_api/common/type_system.py @@ -8,7 +8,7 @@ from typing import Annotated, Literal from pydantic import BaseModel, Field -from llama_stack.schema_utils import json_schema_type, register_schema +from llama_stack_api.schema_utils import json_schema_type, register_schema @json_schema_type @@ -103,17 +103,6 @@ class CompletionInputType(BaseModel): type: Literal["completion_input"] = "completion_input" -@json_schema_type -class AgentTurnInputType(BaseModel): - """Parameter type for agent turn input. - - :param type: Discriminator type. Always "agent_turn_input" - """ - - # expects List[Message] for messages (may also include attachments?) - type: Literal["agent_turn_input"] = "agent_turn_input" - - @json_schema_type class DialogType(BaseModel): """Parameter type for dialog data with semantic output labels. @@ -135,8 +124,7 @@ ParamType = Annotated[ | JsonType | UnionType | ChatCompletionInputType - | CompletionInputType - | AgentTurnInputType, + | CompletionInputType, Field(discriminator="type"), ] register_schema(ParamType, name="ParamType") diff --git a/src/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py new file mode 100644 index 000000000..81b5ab2c4 --- /dev/null +++ b/src/llama_stack_api/conversations.py @@ -0,0 +1,270 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import StrEnum +from typing import Annotated, Literal, Protocol, runtime_checkable + +from pydantic import BaseModel, Field + +from llama_stack_api.openai_responses import ( + OpenAIResponseInputFunctionToolCallOutput, + OpenAIResponseMCPApprovalRequest, + OpenAIResponseMCPApprovalResponse, + OpenAIResponseMessage, + OpenAIResponseOutputMessageFileSearchToolCall, + OpenAIResponseOutputMessageFunctionToolCall, + OpenAIResponseOutputMessageMCPCall, + OpenAIResponseOutputMessageMCPListTools, + OpenAIResponseOutputMessageWebSearchToolCall, +) +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + +Metadata = dict[str, str] + + +@json_schema_type +class Conversation(BaseModel): + """OpenAI-compatible conversation object.""" + + id: str = Field(..., description="The unique ID of the conversation.") + object: Literal["conversation"] = Field( + default="conversation", description="The object type, which is always conversation." + ) + created_at: int = Field( + ..., description="The time at which the conversation was created, measured in seconds since the Unix epoch." + ) + metadata: Metadata | None = Field( + default=None, + description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.", + ) + items: list[dict] | None = Field( + default=None, + description="Initial items to include in the conversation context. You may add up to 20 items at a time.", + ) + + +@json_schema_type +class ConversationMessage(BaseModel): + """OpenAI-compatible message item for conversations.""" + + id: str = Field(..., description="unique identifier for this message") + content: list[dict] = Field(..., description="message content") + role: str = Field(..., description="message role") + status: str = Field(..., description="message status") + type: Literal["message"] = "message" + object: Literal["message"] = "message" + + +ConversationItem = Annotated[ + OpenAIResponseMessage + | OpenAIResponseOutputMessageWebSearchToolCall + | OpenAIResponseOutputMessageFileSearchToolCall + | OpenAIResponseOutputMessageFunctionToolCall + | OpenAIResponseInputFunctionToolCallOutput + | OpenAIResponseMCPApprovalRequest + | OpenAIResponseMCPApprovalResponse + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools, + Field(discriminator="type"), +] +register_schema(ConversationItem, name="ConversationItem") + +# Using OpenAI types directly caused issues but some notes for reference: +# Note that ConversationItem is a Annotated Union of the types below: +# from openai.types.responses import * +# from openai.types.responses.response_item import * +# from openai.types.conversations import ConversationItem +# f = [ +# ResponseFunctionToolCallItem, +# ResponseFunctionToolCallOutputItem, +# ResponseFileSearchToolCall, +# ResponseFunctionWebSearch, +# ImageGenerationCall, +# ResponseComputerToolCall, +# ResponseComputerToolCallOutputItem, +# ResponseReasoningItem, +# ResponseCodeInterpreterToolCall, +# LocalShellCall, +# LocalShellCallOutput, +# McpListTools, +# McpApprovalRequest, +# McpApprovalResponse, +# McpCall, +# ResponseCustomToolCall, +# ResponseCustomToolCallOutput +# ] + + +@json_schema_type +class ConversationDeletedResource(BaseModel): + """Response for deleted conversation.""" + + id: str = Field(..., description="The deleted conversation identifier") + object: str = Field(default="conversation.deleted", description="Object type") + deleted: bool = Field(default=True, description="Whether the object was deleted") + + +@json_schema_type +class ConversationItemCreateRequest(BaseModel): + """Request body for creating conversation items.""" + + items: list[ConversationItem] = Field( + ..., + description="Items to include in the conversation context. You may add up to 20 items at a time.", + max_length=20, + ) + + +class ConversationItemInclude(StrEnum): + """ + Specify additional output data to include in the model response. + """ + + web_search_call_action_sources = "web_search_call.action.sources" + code_interpreter_call_outputs = "code_interpreter_call.outputs" + computer_call_output_output_image_url = "computer_call_output.output.image_url" + file_search_call_results = "file_search_call.results" + message_input_image_image_url = "message.input_image.image_url" + message_output_text_logprobs = "message.output_text.logprobs" + reasoning_encrypted_content = "reasoning.encrypted_content" + + +@json_schema_type +class ConversationItemList(BaseModel): + """List of conversation items with pagination.""" + + object: str = Field(default="list", description="Object type") + data: list[ConversationItem] = Field(..., description="List of conversation items") + first_id: str | None = Field(default=None, description="The ID of the first item in the list") + last_id: str | None = Field(default=None, description="The ID of the last item in the list") + has_more: bool = Field(default=False, description="Whether there are more items available") + + +@json_schema_type +class ConversationItemDeletedResource(BaseModel): + """Response for deleted conversation item.""" + + id: str = Field(..., description="The deleted item identifier") + object: str = Field(default="conversation.item.deleted", description="Object type") + deleted: bool = Field(default=True, description="Whether the object was deleted") + + +@runtime_checkable +class Conversations(Protocol): + """Conversations + + Protocol for conversation management operations.""" + + @webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1) + async def create_conversation( + self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None + ) -> Conversation: + """Create a conversation. + + Create a conversation. + + :param items: Initial items to include in the conversation context. + :param metadata: Set of key-value pairs that can be attached to an object. + :returns: The created conversation object. + """ + ... + + @webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1) + async def get_conversation(self, conversation_id: str) -> Conversation: + """Retrieve a conversation. + + Get a conversation with the given ID. + + :param conversation_id: The conversation identifier. + :returns: The conversation object. + """ + ... + + @webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1) + async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation: + """Update a conversation. + + Update a conversation's metadata with the given ID. + + :param conversation_id: The conversation identifier. + :param metadata: Set of key-value pairs that can be attached to an object. + :returns: The updated conversation object. + """ + ... + + @webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1) + async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource: + """Delete a conversation. + + Delete a conversation with the given ID. + + :param conversation_id: The conversation identifier. + :returns: The deleted conversation resource. + """ + ... + + @webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1) + async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList: + """Create items. + + Create items in the conversation. + + :param conversation_id: The conversation identifier. + :param items: Items to include in the conversation context. + :returns: List of created items. + """ + ... + + @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1) + async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem: + """Retrieve an item. + + Retrieve a conversation item. + + :param conversation_id: The conversation identifier. + :param item_id: The item identifier. + :returns: The conversation item. + """ + ... + + @webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1) + async def list_items( + self, + conversation_id: str, + after: str | None = None, + include: list[ConversationItemInclude] | None = None, + limit: int | None = None, + order: Literal["asc", "desc"] | None = None, + ) -> ConversationItemList: + """List items. + + List items in the conversation. + + :param conversation_id: The conversation identifier. + :param after: An item ID to list items after, used in pagination. + :param include: Specify additional output data to include in the response. + :param limit: A limit on the number of objects to be returned (1-100, default 20). + :param order: The order to return items in (asc or desc, default desc). + :returns: List of conversation items. + """ + ... + + @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1) + async def openai_delete_conversation_item( + self, conversation_id: str, item_id: str + ) -> ConversationItemDeletedResource: + """Delete an item. + + Delete a conversation item. + + :param conversation_id: The conversation identifier. + :param item_id: The item identifier. + :returns: The deleted item resource. + """ + ... diff --git a/src/llama_stack_api/datasetio.py b/src/llama_stack_api/datasetio.py new file mode 100644 index 000000000..309a8ff41 --- /dev/null +++ b/src/llama_stack_api/datasetio.py @@ -0,0 +1,55 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Protocol, runtime_checkable + +from llama_stack_api.common.responses import PaginatedResponse +from llama_stack_api.datasets import Dataset +from llama_stack_api.schema_utils import webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1BETA + + +class DatasetStore(Protocol): + def get_dataset(self, dataset_id: str) -> Dataset: ... + + +@runtime_checkable +class DatasetIO(Protocol): + # keeping for aligning with inference/safety, but this is not used + dataset_store: DatasetStore + + @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) + async def iterrows( + self, + dataset_id: str, + start_index: int | None = None, + limit: int | None = None, + ) -> PaginatedResponse: + """Get a paginated list of rows from a dataset. + + Uses offset-based pagination where: + - start_index: The starting index (0-based). If None, starts from beginning. + - limit: Number of items to return. If None or -1, returns all items. + + The response includes: + - data: List of items for the current page. + - has_more: Whether there are more items available after this set. + + :param dataset_id: The ID of the dataset to get the rows from. + :param start_index: Index into dataset for the first row to get. Get all rows if None. + :param limit: The number of rows to get. + :returns: A PaginatedResponse. + """ + ... + + @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA) + async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: + """Append rows to a dataset. + + :param dataset_id: The ID of the dataset to append the rows to. + :param rows: The rows to append to the dataset. + """ + ... diff --git a/src/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py new file mode 100644 index 000000000..6d707aa8e --- /dev/null +++ b/src/llama_stack_api/datasets.py @@ -0,0 +1,248 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum, StrEnum +from typing import Annotated, Any, Literal, Protocol + +from pydantic import BaseModel, Field + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1BETA + + +class DatasetPurpose(StrEnum): + """ + Purpose of the dataset. Each purpose has a required input data schema. + + :cvar post-training/messages: The dataset contains messages used for post-training. + { + "messages": [ + {"role": "user", "content": "Hello, world!"}, + {"role": "assistant", "content": "Hello, world!"}, + ] + } + :cvar eval/question-answer: The dataset contains a question column and an answer column. + { + "question": "What is the capital of France?", + "answer": "Paris" + } + :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. + { + "messages": [ + {"role": "user", "content": "Hello, my name is John Doe."}, + {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, + {"role": "user", "content": "What's my name?"}, + ], + "answer": "John Doe" + } + """ + + post_training_messages = "post-training/messages" + eval_question_answer = "eval/question-answer" + eval_messages_answer = "eval/messages-answer" + + # TODO: add more schemas here + + +class DatasetType(Enum): + """ + Type of the dataset source. + :cvar uri: The dataset can be obtained from a URI. + :cvar rows: The dataset is stored in rows. + """ + + uri = "uri" + rows = "rows" + + +@json_schema_type +class URIDataSource(BaseModel): + """A dataset that can be obtained from a URI. + :param uri: The dataset can be obtained from a URI. E.g. + - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" + - "data:csv;base64,{base64_content}" + """ + + type: Literal["uri"] = "uri" + uri: str + + +@json_schema_type +class RowsDataSource(BaseModel): + """A dataset stored in rows. + :param rows: The dataset is stored in rows. E.g. + - [ + {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]} + ] + """ + + type: Literal["rows"] = "rows" + rows: list[dict[str, Any]] + + +DataSource = Annotated[ + URIDataSource | RowsDataSource, + Field(discriminator="type"), +] +register_schema(DataSource, name="DataSource") + + +class CommonDatasetFields(BaseModel): + """ + Common fields for a dataset. + + :param purpose: Purpose of the dataset indicating its intended use + :param source: Data source configuration for the dataset + :param metadata: Additional metadata for the dataset + """ + + purpose: DatasetPurpose + source: DataSource + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Any additional metadata for this dataset", + ) + + +@json_schema_type +class Dataset(CommonDatasetFields, Resource): + """Dataset resource for storing and accessing training or evaluation data. + + :param type: Type of resource, always 'dataset' for datasets + """ + + type: Literal[ResourceType.dataset] = ResourceType.dataset + + @property + def dataset_id(self) -> str: + return self.identifier + + @property + def provider_dataset_id(self) -> str | None: + return self.provider_resource_id + + +class DatasetInput(CommonDatasetFields, BaseModel): + """Input parameters for dataset operations. + + :param dataset_id: Unique identifier for the dataset + """ + + dataset_id: str + + +@json_schema_type +class ListDatasetsResponse(BaseModel): + """Response from listing datasets. + + :param data: List of datasets + """ + + data: list[Dataset] + + +class Datasets(Protocol): + @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True) + async def register_dataset( + self, + purpose: DatasetPurpose, + source: DataSource, + metadata: dict[str, Any] | None = None, + dataset_id: str | None = None, + ) -> Dataset: + """ + Register a new dataset. + + :param purpose: The purpose of the dataset. + One of: + - "post-training/messages": The dataset contains a messages column with list of messages for post-training. + { + "messages": [ + {"role": "user", "content": "Hello, world!"}, + {"role": "assistant", "content": "Hello, world!"}, + ] + } + - "eval/question-answer": The dataset contains a question column and an answer column for evaluation. + { + "question": "What is the capital of France?", + "answer": "Paris" + } + - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation. + { + "messages": [ + {"role": "user", "content": "Hello, my name is John Doe."}, + {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, + {"role": "user", "content": "What's my name?"}, + ], + "answer": "John Doe" + } + :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: + - { + "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" + } + - { + "type": "uri", + "uri": "lsfs://mydata.jsonl" + } + - { + "type": "uri", + "uri": "data:csv;base64,{base64_content}" + } + - { + "type": "uri", + "uri": "huggingface://llamastack/simpleqa?split=train" + } + - { + "type": "rows", + "rows": [ + { + "messages": [ + {"role": "user", "content": "Hello, world!"}, + {"role": "assistant", "content": "Hello, world!"}, + ] + } + ] + } + :param metadata: The metadata for the dataset. + - E.g. {"description": "My dataset"}. + :param dataset_id: The ID of the dataset. If not provided, an ID will be generated. + :returns: A Dataset. + """ + ... + + @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) + async def get_dataset( + self, + dataset_id: str, + ) -> Dataset: + """Get a dataset by its ID. + + :param dataset_id: The ID of the dataset to get. + :returns: A Dataset. + """ + ... + + @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA) + async def list_datasets(self) -> ListDatasetsResponse: + """List all datasets. + + :returns: A ListDatasetsResponse. + """ + ... + + @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True) + async def unregister_dataset( + self, + dataset_id: str, + ) -> None: + """Unregister a dataset by its ID. + + :param dataset_id: The ID of the dataset to unregister. + """ + ... diff --git a/src/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py new file mode 100644 index 000000000..f024068f3 --- /dev/null +++ b/src/llama_stack_api/datatypes.py @@ -0,0 +1,368 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum, EnumMeta, StrEnum +from typing import Any, Protocol +from urllib.parse import urlparse + +from pydantic import BaseModel, Field + +from llama_stack_api.benchmarks import Benchmark +from llama_stack_api.datasets import Dataset +from llama_stack_api.models import Model +from llama_stack_api.schema_utils import json_schema_type +from llama_stack_api.scoring_functions import ScoringFn +from llama_stack_api.shields import Shield +from llama_stack_api.tools import ToolGroup +from llama_stack_api.vector_stores import VectorStore + + +class DynamicApiMeta(EnumMeta): + def __new__(cls, name, bases, namespace): + # Store the original enum values + original_values = {k: v for k, v in namespace.items() if not k.startswith("_")} + + # Create the enum class + cls = super().__new__(cls, name, bases, namespace) + + # Store the original values for reference + cls._original_values = original_values + # Initialize _dynamic_values + cls._dynamic_values = {} + + return cls + + def __call__(cls, value): + try: + return super().__call__(value) + except ValueError as e: + # If this value was already dynamically added, return it + if value in cls._dynamic_values: + return cls._dynamic_values[value] + + # If the value doesn't exist, create a new enum member + # Create a new member name from the value + member_name = value.lower().replace("-", "_") + + # If this member name already exists in the enum, return the existing member + if member_name in cls._member_map_: + return cls._member_map_[member_name] + + # Instead of creating a new member, raise ValueError to force users to use Api.add() to + # register new APIs explicitly + raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e + + def __iter__(cls): + # Allow iteration over both static and dynamic members + yield from super().__iter__() + if hasattr(cls, "_dynamic_values"): + yield from cls._dynamic_values.values() + + def add(cls, value): + """ + Add a new API to the enum. + Used to register external APIs. + """ + member_name = value.lower().replace("-", "_") + + # If this member name already exists in the enum, return it + if member_name in cls._member_map_: + return cls._member_map_[member_name] + + # Create a new enum member + member = object.__new__(cls) + member._name_ = member_name + member._value_ = value + + # Add it to the enum class + cls._member_map_[member_name] = member + cls._member_names_.append(member_name) + cls._member_type_ = str + + # Store it in our dynamic values + cls._dynamic_values[value] = member + + return member + + +@json_schema_type +class Api(Enum, metaclass=DynamicApiMeta): + """Enumeration of all available APIs in the Llama Stack system. + :cvar providers: Provider management and configuration + :cvar inference: Text generation, chat completions, and embeddings + :cvar safety: Content moderation and safety shields + :cvar agents: Agent orchestration and execution + :cvar batches: Batch processing for asynchronous API requests + :cvar vector_io: Vector database operations and queries + :cvar datasetio: Dataset input/output operations + :cvar scoring: Model output evaluation and scoring + :cvar eval: Model evaluation and benchmarking framework + :cvar post_training: Fine-tuning and model training + :cvar tool_runtime: Tool execution and management + :cvar telemetry: Observability and system monitoring + :cvar models: Model metadata and management + :cvar shields: Safety shield implementations + :cvar datasets: Dataset creation and management + :cvar scoring_functions: Scoring function definitions + :cvar benchmarks: Benchmark suite management + :cvar tool_groups: Tool group organization + :cvar files: File storage and management + :cvar prompts: Prompt versions and management + :cvar inspect: Built-in system inspection and introspection + """ + + providers = "providers" + inference = "inference" + safety = "safety" + agents = "agents" + batches = "batches" + vector_io = "vector_io" + datasetio = "datasetio" + scoring = "scoring" + eval = "eval" + post_training = "post_training" + tool_runtime = "tool_runtime" + + models = "models" + shields = "shields" + vector_stores = "vector_stores" # only used for routing table + datasets = "datasets" + scoring_functions = "scoring_functions" + benchmarks = "benchmarks" + tool_groups = "tool_groups" + files = "files" + prompts = "prompts" + conversations = "conversations" + + # built-in API + inspect = "inspect" + + +@json_schema_type +class Error(BaseModel): + """ + Error response from the API. Roughly follows RFC 7807. + + :param status: HTTP status code + :param title: Error title, a short summary of the error which is invariant for an error type + :param detail: Error detail, a longer human-readable description of the error + :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error + """ + + status: int + title: str + detail: str + instance: str | None = None + + +class ExternalApiSpec(BaseModel): + """Specification for an external API implementation.""" + + module: str = Field(..., description="Python module containing the API implementation") + name: str = Field(..., description="Name of the API") + pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API") + protocol: str = Field(..., description="Name of the protocol class for the API") + + +# Provider-related types (merged from providers/datatypes.py) +# NOTE: These imports are forward references to avoid circular dependencies +# They will be resolved at runtime when the classes are used + + +class ModelsProtocolPrivate(Protocol): + """ + Protocol for model management. + + This allows users to register their preferred model identifiers. + + Model registration requires - + - a provider, used to route the registration request + - a model identifier, user's intended name for the model during inference + - a provider model identifier, a model identifier supported by the provider + + Providers will only accept registration for provider model ids they support. + + Example, + register: provider x my-model-id x provider-model-id + -> Error if provider does not support provider-model-id + -> Error if my-model-id is already registered + -> Success if provider supports provider-model-id + inference: my-model-id x ... + -> Provider uses provider-model-id for inference + """ + + # this should be called `on_model_register` or something like that. + # the provider should _not_ be able to change the object in this + # callback + async def register_model(self, model: Model) -> Model: ... + + async def unregister_model(self, model_id: str) -> None: ... + + # the Stack router will query each provider for their list of models + # if a `refresh_interval_seconds` is provided, this method will be called + # periodically to refresh the list of models + # + # NOTE: each model returned will be registered with the model registry. this means + # a callback to the `register_model()` method will be made. this is duplicative and + # may be removed in the future. + async def list_models(self) -> list[Model] | None: ... + + async def should_refresh_models(self) -> bool: ... + + +class ShieldsProtocolPrivate(Protocol): + async def register_shield(self, shield: Shield) -> None: ... + + async def unregister_shield(self, identifier: str) -> None: ... + + +class VectorStoresProtocolPrivate(Protocol): + async def register_vector_store(self, vector_store: VectorStore) -> None: ... + + async def unregister_vector_store(self, vector_store_id: str) -> None: ... + + +class DatasetsProtocolPrivate(Protocol): + async def register_dataset(self, dataset: Dataset) -> None: ... + + async def unregister_dataset(self, dataset_id: str) -> None: ... + + +class ScoringFunctionsProtocolPrivate(Protocol): + async def list_scoring_functions(self) -> list[ScoringFn]: ... + + async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ... + + +class BenchmarksProtocolPrivate(Protocol): + async def register_benchmark(self, benchmark: Benchmark) -> None: ... + + +class ToolGroupsProtocolPrivate(Protocol): + async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ... + + async def unregister_toolgroup(self, toolgroup_id: str) -> None: ... + + +@json_schema_type +class ProviderSpec(BaseModel): + api: Api + provider_type: str + config_class: str = Field( + ..., + description="Fully-qualified classname of the config for this provider", + ) + api_dependencies: list[Api] = Field( + default_factory=list, + description="Higher-level API surfaces may depend on other providers to provide their functionality", + ) + optional_api_dependencies: list[Api] = Field( + default_factory=list, + ) + deprecation_warning: str | None = Field( + default=None, + description="If this provider is deprecated, specify the warning message here", + ) + deprecation_error: str | None = Field( + default=None, + description="If this provider is deprecated and does NOT work, specify the error message here", + ) + + module: str | None = Field( + default=None, + description=""" + Fully-qualified name of the module to import. The module is expected to have: + + - `get_adapter_impl(config, deps)`: returns the adapter implementation + + Example: `module: ramalama_stack` + """, + ) + + pip_packages: list[str] = Field( + default_factory=list, + description="The pip dependencies needed for this implementation", + ) + + provider_data_validator: str | None = Field( + default=None, + ) + + is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.") + + # used internally by the resolver; this is a hack for now + deps__: list[str] = Field(default_factory=list) + + @property + def is_sample(self) -> bool: + return self.provider_type in ("sample", "remote::sample") + + +class RoutingTable(Protocol): + async def get_provider_impl(self, routing_key: str) -> Any: ... + + +@json_schema_type +class InlineProviderSpec(ProviderSpec): + container_image: str | None = Field( + default=None, + description=""" +The container image to use for this implementation. If one is provided, pip_packages will be ignored. +If a provider depends on other providers, the dependencies MUST NOT specify a container image. +""", + ) + description: str | None = Field( + default=None, + description=""" +A description of the provider. This is used to display in the documentation. +""", + ) + + +class RemoteProviderConfig(BaseModel): + host: str = "localhost" + port: int | None = None + protocol: str = "http" + + @property + def url(self) -> str: + if self.port is None: + return f"{self.protocol}://{self.host}" + return f"{self.protocol}://{self.host}:{self.port}" + + @classmethod + def from_url(cls, url: str) -> "RemoteProviderConfig": + parsed = urlparse(url) + attrs = {k: v for k, v in parsed._asdict().items() if v is not None} + return cls(**attrs) + + +@json_schema_type +class RemoteProviderSpec(ProviderSpec): + adapter_type: str = Field( + ..., + description="Unique identifier for this adapter", + ) + + description: str | None = Field( + default=None, + description=""" +A description of the provider. This is used to display in the documentation. +""", + ) + + @property + def container_image(self) -> str | None: + return None + + +class HealthStatus(StrEnum): + OK = "OK" + ERROR = "Error" + NOT_IMPLEMENTED = "Not Implemented" + + +HealthResponse = dict[str, Any] diff --git a/src/llama_stack_api/eval.py b/src/llama_stack_api/eval.py new file mode 100644 index 000000000..7a11c221e --- /dev/null +++ b/src/llama_stack_api/eval.py @@ -0,0 +1,137 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Literal, Protocol + +from pydantic import BaseModel, Field + +from llama_stack_api.common.job_types import Job +from llama_stack_api.inference import SamplingParams, SystemMessage +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.scoring import ScoringResult +from llama_stack_api.scoring_functions import ScoringFnParams +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA + + +@json_schema_type +class ModelCandidate(BaseModel): + """A model candidate for evaluation. + + :param model: The model ID to evaluate. + :param sampling_params: The sampling parameters for the model. + :param system_message: (Optional) The system message providing instructions or context to the model. + """ + + type: Literal["model"] = "model" + model: str + sampling_params: SamplingParams + system_message: SystemMessage | None = None + + +EvalCandidate = ModelCandidate + + +@json_schema_type +class BenchmarkConfig(BaseModel): + """A benchmark configuration for evaluation. + + :param eval_candidate: The candidate to evaluate. + :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run + :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated + """ + + eval_candidate: EvalCandidate + scoring_params: dict[str, ScoringFnParams] = Field( + description="Map between scoring function id and parameters for each scoring function you want to run", + default_factory=dict, + ) + num_examples: int | None = Field( + description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated", + default=None, + ) + # we could optinally add any specific dataset config here + + +@json_schema_type +class EvaluateResponse(BaseModel): + """The response from an evaluation. + + :param generations: The generations from the evaluation. + :param scores: The scores from the evaluation. + """ + + generations: list[dict[str, Any]] + # each key in the dict is a scoring function name + scores: dict[str, ScoringResult] + + +class Eval(Protocol): + """Evaluations + + Llama Stack Evaluation API for running evaluations on model and agent candidates.""" + + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def run_eval( + self, + benchmark_id: str, + benchmark_config: BenchmarkConfig, + ) -> Job: + """Run an evaluation on a benchmark. + + :param benchmark_id: The ID of the benchmark to run the evaluation on. + :param benchmark_config: The configuration for the benchmark. + :returns: The job that was created to run the evaluation. + """ + ... + + @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def evaluate_rows( + self, + benchmark_id: str, + input_rows: list[dict[str, Any]], + scoring_functions: list[str], + benchmark_config: BenchmarkConfig, + ) -> EvaluateResponse: + """Evaluate a list of rows on a benchmark. + + :param benchmark_id: The ID of the benchmark to run the evaluation on. + :param input_rows: The rows to evaluate. + :param scoring_functions: The scoring functions to use for the evaluation. + :param benchmark_config: The configuration for the benchmark. + :returns: EvaluateResponse object containing generations and scores. + """ + ... + + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def job_status(self, benchmark_id: str, job_id: str) -> Job: + """Get the status of a job. + + :param benchmark_id: The ID of the benchmark to run the evaluation on. + :param job_id: The ID of the job to get the status of. + :returns: The status of the evaluation job. + """ + ... + + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) + async def job_cancel(self, benchmark_id: str, job_id: str) -> None: + """Cancel a job. + + :param benchmark_id: The ID of the benchmark to run the evaluation on. + :param job_id: The ID of the job to cancel. + """ + ... + + @webmethod( + route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA + ) + async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: + """Get the result of a job. + + :param benchmark_id: The ID of the benchmark to run the evaluation on. + :param job_id: The ID of the job to get the result of. + :returns: The result of the job. + """ + ... diff --git a/src/llama_stack_api/files.py b/src/llama_stack_api/files.py new file mode 100644 index 000000000..e515fe0ae --- /dev/null +++ b/src/llama_stack_api/files.py @@ -0,0 +1,192 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import StrEnum +from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable + +from fastapi import File, Form, Response, UploadFile +from pydantic import BaseModel, Field + +from llama_stack_api.common.responses import Order +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +# OpenAI Files API Models +class OpenAIFilePurpose(StrEnum): + """ + Valid purpose values for OpenAI Files API. + """ + + ASSISTANTS = "assistants" + BATCH = "batch" + # TODO: Add other purposes as needed + + +@json_schema_type +class OpenAIFileObject(BaseModel): + """ + OpenAI File object as defined in the OpenAI Files API. + + :param object: The object type, which is always "file" + :param id: The file identifier, which can be referenced in the API endpoints + :param bytes: The size of the file, in bytes + :param created_at: The Unix timestamp (in seconds) for when the file was created + :param expires_at: The Unix timestamp (in seconds) for when the file expires + :param filename: The name of the file + :param purpose: The intended purpose of the file + """ + + object: Literal["file"] = "file" + id: str + bytes: int + created_at: int + expires_at: int + filename: str + purpose: OpenAIFilePurpose + + +@json_schema_type +class ExpiresAfter(BaseModel): + """ + Control expiration of uploaded files. + + Params: + - anchor, must be "created_at" + - seconds, must be int between 3600 and 2592000 (1 hour to 30 days) + """ + + MIN: ClassVar[int] = 3600 # 1 hour + MAX: ClassVar[int] = 2592000 # 30 days + + anchor: Literal["created_at"] + seconds: int = Field(..., ge=3600, le=2592000) + + +@json_schema_type +class ListOpenAIFileResponse(BaseModel): + """ + Response for listing files in OpenAI Files API. + + :param data: List of file objects + :param has_more: Whether there are more files available beyond this page + :param first_id: ID of the first file in the list for pagination + :param last_id: ID of the last file in the list for pagination + :param object: The object type, which is always "list" + """ + + data: list[OpenAIFileObject] + has_more: bool + first_id: str + last_id: str + object: Literal["list"] = "list" + + +@json_schema_type +class OpenAIFileDeleteResponse(BaseModel): + """ + Response for deleting a file in OpenAI Files API. + + :param id: The file identifier that was deleted + :param object: The object type, which is always "file" + :param deleted: Whether the file was successfully deleted + """ + + id: str + object: Literal["file"] = "file" + deleted: bool + + +@runtime_checkable +class Files(Protocol): + """Files + + This API is used to upload documents that can be used with other Llama Stack APIs. + """ + + # OpenAI Files API Endpoints + @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1) + async def openai_upload_file( + self, + file: Annotated[UploadFile, File()], + purpose: Annotated[OpenAIFilePurpose, Form()], + expires_after: Annotated[ExpiresAfter | None, Form()] = None, + ) -> OpenAIFileObject: + """Upload file. + + Upload a file that can be used across various endpoints. + + The file upload should be a multipart form request with: + - file: The File object (not file name) to be uploaded. + - purpose: The intended purpose of the uploaded file. + - expires_after: Optional form values describing expiration for the file. + + :param file: The uploaded file object containing content and metadata (filename, content_type, etc.). + :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune"). + :param expires_after: Optional form values describing expiration for the file. + :returns: An OpenAIFileObject representing the uploaded file. + """ + ... + + @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1) + async def openai_list_files( + self, + after: str | None = None, + limit: int | None = 10000, + order: Order | None = Order.desc, + purpose: OpenAIFilePurpose | None = None, + ) -> ListOpenAIFileResponse: + """List files. + + Returns a list of files that belong to the user's organization. + + :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list. + :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000. + :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. + :param purpose: Only return files with the given purpose. + :returns: An ListOpenAIFileResponse containing the list of files. + """ + ... + + @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1) + async def openai_retrieve_file( + self, + file_id: str, + ) -> OpenAIFileObject: + """Retrieve file. + + Returns information about a specific file. + + :param file_id: The ID of the file to use for this request. + :returns: An OpenAIFileObject containing file information. + """ + ... + + @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1) + async def openai_delete_file( + self, + file_id: str, + ) -> OpenAIFileDeleteResponse: + """Delete file. + + :param file_id: The ID of the file to use for this request. + :returns: An OpenAIFileDeleteResponse indicating successful deletion. + """ + ... + + @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1) + async def openai_retrieve_file_content( + self, + file_id: str, + ) -> Response: + """Retrieve file content. + + Returns the contents of the specified file. + + :param file_id: The ID of the file to use for this request. + :returns: The raw file content as a binary response. + """ + ... diff --git a/src/llama_stack_api/inference.py b/src/llama_stack_api/inference.py new file mode 100644 index 000000000..4a169486a --- /dev/null +++ b/src/llama_stack_api/inference.py @@ -0,0 +1,1094 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncIterator +from enum import Enum, StrEnum +from typing import ( + Annotated, + Any, + Literal, + Protocol, + runtime_checkable, +) + +from fastapi import Body +from pydantic import BaseModel, Field +from typing_extensions import TypedDict + +from llama_stack_api.common.content_types import InterleavedContent +from llama_stack_api.common.responses import ( + Order, +) +from llama_stack_api.models import Model +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA + + +@json_schema_type +class GreedySamplingStrategy(BaseModel): + """Greedy sampling strategy that selects the highest probability token at each step. + + :param type: Must be "greedy" to identify this sampling strategy + """ + + type: Literal["greedy"] = "greedy" + + +@json_schema_type +class TopPSamplingStrategy(BaseModel): + """Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p. + + :param type: Must be "top_p" to identify this sampling strategy + :param temperature: Controls randomness in sampling. Higher values increase randomness + :param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95 + """ + + type: Literal["top_p"] = "top_p" + temperature: float | None = Field(..., gt=0.0) + top_p: float | None = 0.95 + + +@json_schema_type +class TopKSamplingStrategy(BaseModel): + """Top-k sampling strategy that restricts sampling to the k most likely tokens. + + :param type: Must be "top_k" to identify this sampling strategy + :param top_k: Number of top tokens to consider for sampling. Must be at least 1 + """ + + type: Literal["top_k"] = "top_k" + top_k: int = Field(..., ge=1) + + +SamplingStrategy = Annotated[ + GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy, + Field(discriminator="type"), +] +register_schema(SamplingStrategy, name="SamplingStrategy") + + +@json_schema_type +class SamplingParams(BaseModel): + """Sampling parameters. + + :param strategy: The sampling strategy. + :param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of + your prompt plus max_tokens cannot exceed the model's context length. + :param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens + based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + :param stop: Up to 4 sequences where the API will stop generating further tokens. + The returned text will not contain the stop sequence. + """ + + strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) + + max_tokens: int | None = None + repetition_penalty: float | None = 1.0 + stop: list[str] | None = None + + +class LogProbConfig(BaseModel): + """ + + :param top_k: How many tokens (for each position) to return log probabilities for. + """ + + top_k: int | None = 0 + + +class QuantizationType(Enum): + """Type of model quantization to run inference with. + + :cvar bf16: BFloat16 typically this means _no_ quantization + :cvar fp8_mixed: 8-bit floating point quantization with mixed precision + :cvar int4_mixed: 4-bit integer quantization with mixed precision + """ + + bf16 = "bf16" + fp8_mixed = "fp8_mixed" + int4_mixed = "int4_mixed" + + +@json_schema_type +class Fp8QuantizationConfig(BaseModel): + """Configuration for 8-bit floating point quantization. + + :param type: Must be "fp8_mixed" to identify this quantization type + """ + + type: Literal["fp8_mixed"] = "fp8_mixed" + + +@json_schema_type +class Bf16QuantizationConfig(BaseModel): + """Configuration for BFloat16 precision (typically no quantization). + + :param type: Must be "bf16" to identify this quantization type + """ + + type: Literal["bf16"] = "bf16" + + +@json_schema_type +class Int4QuantizationConfig(BaseModel): + """Configuration for 4-bit integer quantization. + + :param type: Must be "int4" to identify this quantization type + :param scheme: Quantization scheme to use. Defaults to "int4_weight_int8_dynamic_activation" + """ + + type: Literal["int4_mixed"] = "int4_mixed" + scheme: str | None = "int4_weight_int8_dynamic_activation" + + +QuantizationConfig = Annotated[ + Bf16QuantizationConfig | Fp8QuantizationConfig | Int4QuantizationConfig, + Field(discriminator="type"), +] + + +@json_schema_type +class UserMessage(BaseModel): + """A message from the user in a chat conversation. + + :param role: Must be "user" to identify this as a user message + :param content: The content of the message, which can include text and other media + :param context: (Optional) This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future. + """ + + role: Literal["user"] = "user" + content: InterleavedContent + context: InterleavedContent | None = None + + +@json_schema_type +class SystemMessage(BaseModel): + """A system message providing instructions or context to the model. + + :param role: Must be "system" to identify this as a system message + :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions). + """ + + role: Literal["system"] = "system" + content: InterleavedContent + + +@json_schema_type +class ToolResponseMessage(BaseModel): + """A message representing the result of a tool invocation. + + :param role: Must be "tool" to identify this as a tool response + :param call_id: Unique identifier for the tool call this response is for + :param content: The response content from the tool + """ + + role: Literal["tool"] = "tool" + call_id: str + content: InterleavedContent + + +class ToolChoice(Enum): + """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model. + + :cvar auto: The model may use tools if it determines that is appropriate. + :cvar required: The model must use tools. + :cvar none: The model must not use tools. + """ + + auto = "auto" + required = "required" + none = "none" + + +@json_schema_type +class TokenLogProbs(BaseModel): + """Log probabilities for generated tokens. + + :param logprobs_by_token: Dictionary mapping tokens to their log probabilities + """ + + logprobs_by_token: dict[str, float] + + +class ChatCompletionResponseEventType(Enum): + """Types of events that can occur during chat completion. + + :cvar start: Inference has started + :cvar complete: Inference is complete and a full response is available + :cvar progress: Inference is in progress and a partial response is available + """ + + start = "start" + complete = "complete" + progress = "progress" + + +class ResponseFormatType(StrEnum): + """Types of formats for structured (guided) decoding. + + :cvar json_schema: Response should conform to a JSON schema. In a Python SDK, this is often a `pydantic` model. + :cvar grammar: Response should conform to a BNF grammar + """ + + json_schema = "json_schema" + grammar = "grammar" + + +@json_schema_type +class JsonSchemaResponseFormat(BaseModel): + """Configuration for JSON schema-guided response generation. + + :param type: Must be "json_schema" to identify this format type + :param json_schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. + """ + + type: Literal[ResponseFormatType.json_schema] = ResponseFormatType.json_schema + json_schema: dict[str, Any] + + +@json_schema_type +class GrammarResponseFormat(BaseModel): + """Configuration for grammar-guided response generation. + + :param type: Must be "grammar" to identify this format type + :param bnf: The BNF grammar specification the response should conform to + """ + + type: Literal[ResponseFormatType.grammar] = ResponseFormatType.grammar + bnf: dict[str, Any] + + +ResponseFormat = Annotated[ + JsonSchemaResponseFormat | GrammarResponseFormat, + Field(discriminator="type"), +] +register_schema(ResponseFormat, name="ResponseFormat") + + +# This is an internally used class +class CompletionRequest(BaseModel): + model: str + content: InterleavedContent + sampling_params: SamplingParams | None = Field(default_factory=SamplingParams) + response_format: ResponseFormat | None = None + stream: bool | None = False + logprobs: LogProbConfig | None = None + + +class SystemMessageBehavior(Enum): + """Config for how to override the default system prompt. + + :cvar append: Appends the provided system message to the default system prompt: + https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2/#-function-definitions-in-the-system-prompt- + :cvar replace: Replaces the default system prompt with the provided system message. The system message can include the string + '{{function_definitions}}' to indicate where the function definitions should be inserted. + """ + + append = "append" + replace = "replace" + + +@json_schema_type +class EmbeddingsResponse(BaseModel): + """Response containing generated embeddings. + + :param embeddings: List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id} + """ + + embeddings: list[list[float]] + + +@json_schema_type +class RerankData(BaseModel): + """A single rerank result from a reranking response. + + :param index: The original index of the document in the input list + :param relevance_score: The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance. + """ + + index: int + relevance_score: float + + +@json_schema_type +class RerankResponse(BaseModel): + """Response from a reranking request. + + :param data: List of rerank result objects, sorted by relevance score (descending) + """ + + data: list[RerankData] + + +@json_schema_type +class OpenAIChatCompletionContentPartTextParam(BaseModel): + """Text content part for OpenAI-compatible chat completion messages. + + :param type: Must be "text" to identify this as text content + :param text: The text content of the message + """ + + type: Literal["text"] = "text" + text: str + + +@json_schema_type +class OpenAIImageURL(BaseModel): + """Image URL specification for OpenAI-compatible chat completion messages. + + :param url: URL of the image to include in the message + :param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto" + """ + + url: str + detail: str | None = None + + +@json_schema_type +class OpenAIChatCompletionContentPartImageParam(BaseModel): + """Image content part for OpenAI-compatible chat completion messages. + + :param type: Must be "image_url" to identify this as image content + :param image_url: Image URL specification and processing details + """ + + type: Literal["image_url"] = "image_url" + image_url: OpenAIImageURL + + +@json_schema_type +class OpenAIFileFile(BaseModel): + file_data: str | None = None + file_id: str | None = None + filename: str | None = None + + +@json_schema_type +class OpenAIFile(BaseModel): + type: Literal["file"] = "file" + file: OpenAIFileFile + + +OpenAIChatCompletionContentPartParam = Annotated[ + OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile, + Field(discriminator="type"), +] +register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam") + + +OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam] + +OpenAIChatCompletionTextOnlyMessageContent = str | list[OpenAIChatCompletionContentPartTextParam] + + +@json_schema_type +class OpenAIUserMessageParam(BaseModel): + """A message from the user in an OpenAI-compatible chat completion request. + + :param role: Must be "user" to identify this as a user message + :param content: The content of the message, which can include text and other media + :param name: (Optional) The name of the user message participant. + """ + + role: Literal["user"] = "user" + content: OpenAIChatCompletionMessageContent + name: str | None = None + + +@json_schema_type +class OpenAISystemMessageParam(BaseModel): + """A system message providing instructions or context to the model. + + :param role: Must be "system" to identify this as a system message + :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions). + :param name: (Optional) The name of the system message participant. + """ + + role: Literal["system"] = "system" + content: OpenAIChatCompletionTextOnlyMessageContent + name: str | None = None + + +@json_schema_type +class OpenAIChatCompletionToolCallFunction(BaseModel): + """Function call details for OpenAI-compatible tool calls. + + :param name: (Optional) Name of the function to call + :param arguments: (Optional) Arguments to pass to the function as a JSON string + """ + + name: str | None = None + arguments: str | None = None + + +@json_schema_type +class OpenAIChatCompletionToolCall(BaseModel): + """Tool call specification for OpenAI-compatible chat completion responses. + + :param index: (Optional) Index of the tool call in the list + :param id: (Optional) Unique identifier for the tool call + :param type: Must be "function" to identify this as a function call + :param function: (Optional) Function call details + """ + + index: int | None = None + id: str | None = None + type: Literal["function"] = "function" + function: OpenAIChatCompletionToolCallFunction | None = None + + +@json_schema_type +class OpenAIAssistantMessageParam(BaseModel): + """A message containing the model's (assistant) response in an OpenAI-compatible chat completion request. + + :param role: Must be "assistant" to identify this as the model's response + :param content: The content of the model's response + :param name: (Optional) The name of the assistant message participant. + :param tool_calls: List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object. + """ + + role: Literal["assistant"] = "assistant" + content: OpenAIChatCompletionTextOnlyMessageContent | None = None + name: str | None = None + tool_calls: list[OpenAIChatCompletionToolCall] | None = None + + +@json_schema_type +class OpenAIToolMessageParam(BaseModel): + """A message representing the result of a tool invocation in an OpenAI-compatible chat completion request. + + :param role: Must be "tool" to identify this as a tool response + :param tool_call_id: Unique identifier for the tool call this response is for + :param content: The response content from the tool + """ + + role: Literal["tool"] = "tool" + tool_call_id: str + content: OpenAIChatCompletionTextOnlyMessageContent + + +@json_schema_type +class OpenAIDeveloperMessageParam(BaseModel): + """A message from the developer in an OpenAI-compatible chat completion request. + + :param role: Must be "developer" to identify this as a developer message + :param content: The content of the developer message + :param name: (Optional) The name of the developer message participant. + """ + + role: Literal["developer"] = "developer" + content: OpenAIChatCompletionTextOnlyMessageContent + name: str | None = None + + +OpenAIMessageParam = Annotated[ + OpenAIUserMessageParam + | OpenAISystemMessageParam + | OpenAIAssistantMessageParam + | OpenAIToolMessageParam + | OpenAIDeveloperMessageParam, + Field(discriminator="role"), +] +register_schema(OpenAIMessageParam, name="OpenAIMessageParam") + + +@json_schema_type +class OpenAIResponseFormatText(BaseModel): + """Text response format for OpenAI-compatible chat completion requests. + + :param type: Must be "text" to indicate plain text response format + """ + + type: Literal["text"] = "text" + + +@json_schema_type +class OpenAIJSONSchema(TypedDict, total=False): + """JSON schema specification for OpenAI-compatible structured response format. + + :param name: Name of the schema + :param description: (Optional) Description of the schema + :param strict: (Optional) Whether to enforce strict adherence to the schema + :param schema: (Optional) The JSON schema definition + """ + + name: str + description: str | None + strict: bool | None + + # Pydantic BaseModel cannot be used with a schema param, since it already + # has one. And, we don't want to alias here because then have to handle + # that alias when converting to OpenAI params. So, to support schema, + # we use a TypedDict. + schema: dict[str, Any] | None + + +@json_schema_type +class OpenAIResponseFormatJSONSchema(BaseModel): + """JSON schema response format for OpenAI-compatible chat completion requests. + + :param type: Must be "json_schema" to indicate structured JSON response format + :param json_schema: The JSON schema specification for the response + """ + + type: Literal["json_schema"] = "json_schema" + json_schema: OpenAIJSONSchema + + +@json_schema_type +class OpenAIResponseFormatJSONObject(BaseModel): + """JSON object response format for OpenAI-compatible chat completion requests. + + :param type: Must be "json_object" to indicate generic JSON object response format + """ + + type: Literal["json_object"] = "json_object" + + +OpenAIResponseFormatParam = Annotated[ + OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject, + Field(discriminator="type"), +] +register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam") + + +@json_schema_type +class OpenAITopLogProb(BaseModel): + """The top log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + """ + + token: str + bytes: list[int] | None = None + logprob: float + + +@json_schema_type +class OpenAITokenLogProb(BaseModel): + """The log probability for a token from an OpenAI-compatible chat completion response. + + :token: The token + :bytes: (Optional) The bytes for the token + :logprob: The log probability of the token + :top_logprobs: The top log probabilities for the token + """ + + token: str + bytes: list[int] | None = None + logprob: float + top_logprobs: list[OpenAITopLogProb] + + +@json_schema_type +class OpenAIChoiceLogprobs(BaseModel): + """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response. + + :param content: (Optional) The log probabilities for the tokens in the message + :param refusal: (Optional) The log probabilities for the tokens in the message + """ + + content: list[OpenAITokenLogProb] | None = None + refusal: list[OpenAITokenLogProb] | None = None + + +@json_schema_type +class OpenAIChoiceDelta(BaseModel): + """A delta from an OpenAI-compatible chat completion streaming response. + + :param content: (Optional) The content of the delta + :param refusal: (Optional) The refusal of the delta + :param role: (Optional) The role of the delta + :param tool_calls: (Optional) The tool calls of the delta + :param reasoning_content: (Optional) The reasoning content from the model (non-standard, for o1/o3 models) + """ + + content: str | None = None + refusal: str | None = None + role: str | None = None + tool_calls: list[OpenAIChatCompletionToolCall] | None = None + reasoning_content: str | None = None + + +@json_schema_type +class OpenAIChunkChoice(BaseModel): + """A chunk choice from an OpenAI-compatible chat completion streaming response. + + :param delta: The delta from the chunk + :param finish_reason: The reason the model stopped generating + :param index: The index of the choice + :param logprobs: (Optional) The log probabilities for the tokens in the message + """ + + delta: OpenAIChoiceDelta + finish_reason: str + index: int + logprobs: OpenAIChoiceLogprobs | None = None + + +@json_schema_type +class OpenAIChoice(BaseModel): + """A choice from an OpenAI-compatible chat completion response. + + :param message: The message from the model + :param finish_reason: The reason the model stopped generating + :param index: The index of the choice + :param logprobs: (Optional) The log probabilities for the tokens in the message + """ + + message: OpenAIMessageParam + finish_reason: str + index: int + logprobs: OpenAIChoiceLogprobs | None = None + + +class OpenAIChatCompletionUsageCompletionTokensDetails(BaseModel): + """Token details for output tokens in OpenAI chat completion usage. + + :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models) + """ + + reasoning_tokens: int | None = None + + +class OpenAIChatCompletionUsagePromptTokensDetails(BaseModel): + """Token details for prompt tokens in OpenAI chat completion usage. + + :param cached_tokens: Number of tokens retrieved from cache + """ + + cached_tokens: int | None = None + + +@json_schema_type +class OpenAIChatCompletionUsage(BaseModel): + """Usage information for OpenAI chat completion. + + :param prompt_tokens: Number of tokens in the prompt + :param completion_tokens: Number of tokens in the completion + :param total_tokens: Total tokens used (prompt + completion) + :param input_tokens_details: Detailed breakdown of input token usage + :param output_tokens_details: Detailed breakdown of output token usage + """ + + prompt_tokens: int + completion_tokens: int + total_tokens: int + prompt_tokens_details: OpenAIChatCompletionUsagePromptTokensDetails | None = None + completion_tokens_details: OpenAIChatCompletionUsageCompletionTokensDetails | None = None + + +@json_schema_type +class OpenAIChatCompletion(BaseModel): + """Response from an OpenAI-compatible chat completion request. + + :param id: The ID of the chat completion + :param choices: List of choices + :param object: The object type, which will be "chat.completion" + :param created: The Unix timestamp in seconds when the chat completion was created + :param model: The model that was used to generate the chat completion + :param usage: Token usage information for the completion + """ + + id: str + choices: list[OpenAIChoice] + object: Literal["chat.completion"] = "chat.completion" + created: int + model: str + usage: OpenAIChatCompletionUsage | None = None + + +@json_schema_type +class OpenAIChatCompletionChunk(BaseModel): + """Chunk from a streaming response to an OpenAI-compatible chat completion request. + + :param id: The ID of the chat completion + :param choices: List of choices + :param object: The object type, which will be "chat.completion.chunk" + :param created: The Unix timestamp in seconds when the chat completion was created + :param model: The model that was used to generate the chat completion + :param usage: Token usage information (typically included in final chunk with stream_options) + """ + + id: str + choices: list[OpenAIChunkChoice] + object: Literal["chat.completion.chunk"] = "chat.completion.chunk" + created: int + model: str + usage: OpenAIChatCompletionUsage | None = None + + +@json_schema_type +class OpenAICompletionLogprobs(BaseModel): + """The log probabilities for the tokens in the message from an OpenAI-compatible completion response. + + :text_offset: (Optional) The offset of the token in the text + :token_logprobs: (Optional) The log probabilities for the tokens + :tokens: (Optional) The tokens + :top_logprobs: (Optional) The top log probabilities for the tokens + """ + + text_offset: list[int] | None = None + token_logprobs: list[float] | None = None + tokens: list[str] | None = None + top_logprobs: list[dict[str, float]] | None = None + + +@json_schema_type +class OpenAICompletionChoice(BaseModel): + """A choice from an OpenAI-compatible completion response. + + :finish_reason: The reason the model stopped generating + :text: The text of the choice + :index: The index of the choice + :logprobs: (Optional) The log probabilities for the tokens in the choice + """ + + finish_reason: str + text: str + index: int + logprobs: OpenAIChoiceLogprobs | None = None + + +@json_schema_type +class OpenAICompletion(BaseModel): + """Response from an OpenAI-compatible completion request. + + :id: The ID of the completion + :choices: List of choices + :created: The Unix timestamp in seconds when the completion was created + :model: The model that was used to generate the completion + :object: The object type, which will be "text_completion" + """ + + id: str + choices: list[OpenAICompletionChoice] + created: int + model: str + object: Literal["text_completion"] = "text_completion" + + +@json_schema_type +class OpenAIEmbeddingData(BaseModel): + """A single embedding data object from an OpenAI-compatible embeddings response. + + :param object: The object type, which will be "embedding" + :param embedding: The embedding vector as a list of floats (when encoding_format="float") or as a base64-encoded string (when encoding_format="base64") + :param index: The index of the embedding in the input list + """ + + object: Literal["embedding"] = "embedding" + # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData + embedding: list[float] | str + index: int + + +@json_schema_type +class OpenAIEmbeddingUsage(BaseModel): + """Usage information for an OpenAI-compatible embeddings response. + + :param prompt_tokens: The number of tokens in the input + :param total_tokens: The total number of tokens used + """ + + prompt_tokens: int + total_tokens: int + + +@json_schema_type +class OpenAIEmbeddingsResponse(BaseModel): + """Response from an OpenAI-compatible embeddings request. + + :param object: The object type, which will be "list" + :param data: List of embedding data objects + :param model: The model that was used to generate the embeddings + :param usage: Usage information + """ + + object: Literal["list"] = "list" + data: list[OpenAIEmbeddingData] + model: str + usage: OpenAIEmbeddingUsage + + +class ModelStore(Protocol): + async def get_model(self, identifier: str) -> Model: ... + + +class TextTruncation(Enum): + """Config for how to truncate text for embedding when text is longer than the model's max sequence length. Start and End semantics depend on whether the language is left-to-right or right-to-left. + + :cvar none: No truncation (default). If the text is longer than the model's max sequence length, you will get an error. + :cvar start: Truncate from the start + :cvar end: Truncate from the end + """ + + none = "none" + start = "start" + end = "end" + + +class EmbeddingTaskType(Enum): + """How is the embedding being used? This is only supported by asymmetric embedding models. + + :cvar query: Used for a query for semantic search. + :cvar document: Used at indexing time when ingesting documents. + """ + + query = "query" + document = "document" + + +class OpenAICompletionWithInputMessages(OpenAIChatCompletion): + input_messages: list[OpenAIMessageParam] + + +@json_schema_type +class ListOpenAIChatCompletionResponse(BaseModel): + """Response from listing OpenAI-compatible chat completions. + + :param data: List of chat completion objects with their input messages + :param has_more: Whether there are more completions available beyond this list + :param first_id: ID of the first completion in this list + :param last_id: ID of the last completion in this list + :param object: Must be "list" to identify this as a list response + """ + + data: list[OpenAICompletionWithInputMessages] + has_more: bool + first_id: str + last_id: str + object: Literal["list"] = "list" + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible completion endpoint. + + :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. + :param prompt: The prompt to generate a completion for. + :param best_of: (Optional) The number of completions to generate. + :param echo: (Optional) Whether to echo the prompt. + :param frequency_penalty: (Optional) The penalty for repeated tokens. + :param logit_bias: (Optional) The logit bias to use. + :param logprobs: (Optional) The log probabilities to use. + :param max_tokens: (Optional) The maximum number of tokens to generate. + :param n: (Optional) The number of completions to generate. + :param presence_penalty: (Optional) The penalty for repeated tokens. + :param seed: (Optional) The seed to use. + :param stop: (Optional) The stop tokens to use. + :param stream: (Optional) Whether to stream the response. + :param stream_options: (Optional) The stream options to use. + :param temperature: (Optional) The temperature to use. + :param top_p: (Optional) The top p to use. + :param user: (Optional) The user to use. + :param suffix: (Optional) The suffix that should be appended to the completion. + """ + + # Standard OpenAI completion parameters + model: str + prompt: str | list[str] | list[int] | list[list[int]] + best_of: int | None = None + echo: bool | None = None + frequency_penalty: float | None = None + logit_bias: dict[str, float] | None = None + logprobs: bool | None = None + max_tokens: int | None = None + n: int | None = None + presence_penalty: float | None = None + seed: int | None = None + stop: str | list[str] | None = None + stream: bool | None = None + stream_options: dict[str, Any] | None = None + temperature: float | None = None + top_p: float | None = None + user: str | None = None + suffix: str | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible chat completion endpoint. + + :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. + :param messages: List of messages in the conversation. + :param frequency_penalty: (Optional) The penalty for repeated tokens. + :param function_call: (Optional) The function call to use. + :param functions: (Optional) List of functions to use. + :param logit_bias: (Optional) The logit bias to use. + :param logprobs: (Optional) The log probabilities to use. + :param max_completion_tokens: (Optional) The maximum number of tokens to generate. + :param max_tokens: (Optional) The maximum number of tokens to generate. + :param n: (Optional) The number of completions to generate. + :param parallel_tool_calls: (Optional) Whether to parallelize tool calls. + :param presence_penalty: (Optional) The penalty for repeated tokens. + :param response_format: (Optional) The response format to use. + :param seed: (Optional) The seed to use. + :param stop: (Optional) The stop tokens to use. + :param stream: (Optional) Whether to stream the response. + :param stream_options: (Optional) The stream options to use. + :param temperature: (Optional) The temperature to use. + :param tool_choice: (Optional) The tool choice to use. + :param tools: (Optional) The tools to use. + :param top_logprobs: (Optional) The top log probabilities to use. + :param top_p: (Optional) The top p to use. + :param user: (Optional) The user to use. + """ + + # Standard OpenAI chat completion parameters + model: str + messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)] + frequency_penalty: float | None = None + function_call: str | dict[str, Any] | None = None + functions: list[dict[str, Any]] | None = None + logit_bias: dict[str, float] | None = None + logprobs: bool | None = None + max_completion_tokens: int | None = None + max_tokens: int | None = None + n: int | None = None + parallel_tool_calls: bool | None = None + presence_penalty: float | None = None + response_format: OpenAIResponseFormatParam | None = None + seed: int | None = None + stop: str | list[str] | None = None + stream: bool | None = None + stream_options: dict[str, Any] | None = None + temperature: float | None = None + tool_choice: str | dict[str, Any] | None = None + tools: list[dict[str, Any]] | None = None + top_logprobs: int | None = None + top_p: float | None = None + user: str | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): + """Request parameters for OpenAI-compatible embeddings endpoint. + + :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. + :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings. + :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". + :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + """ + + model: str + input: str | list[str] + encoding_format: str | None = "float" + dimensions: int | None = None + user: str | None = None + + +@runtime_checkable +class InferenceProvider(Protocol): + """ + This protocol defines the interface that should be implemented by all inference providers. + """ + + API_NAMESPACE: str = "Inference" + + model_store: ModelStore | None = None + + @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + """Rerank a list of documents based on their relevance to a query. + + :param model: The identifier of the reranking model to use. + :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length. + :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length. + :param max_num_results: (Optional) Maximum number of results to return. Default: returns all. + :returns: RerankResponse with indices sorted by relevance score (descending). + """ + raise NotImplementedError("Reranking is not implemented") + return # this is so mypy's safe-super rule will consider the method concrete + + @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1) + async def openai_completion( + self, + params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)], + ) -> OpenAICompletion: + """Create completion. + + Generate an OpenAI-compatible completion for the given prompt using the specified model. + :returns: An OpenAICompletion. + """ + ... + + @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1) + async def openai_chat_completion( + self, + params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)], + ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: + """Create chat completions. + + Generate an OpenAI-compatible chat completion for the given messages using the specified model. + :returns: An OpenAIChatCompletion. + """ + ... + + @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) + async def openai_embeddings( + self, + params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)], + ) -> OpenAIEmbeddingsResponse: + """Create embeddings. + + Generate OpenAI-compatible embeddings for the given input using the specified model. + :returns: An OpenAIEmbeddingsResponse containing the embeddings. + """ + ... + + +class Inference(InferenceProvider): + """Inference + + Llama Stack Inference API for generating completions, chat completions, and embeddings. + + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. + """ + + @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1) + async def list_chat_completions( + self, + after: str | None = None, + limit: int | None = 20, + model: str | None = None, + order: Order | None = Order.desc, + ) -> ListOpenAIChatCompletionResponse: + """List chat completions. + + :param after: The ID of the last chat completion to return. + :param limit: The maximum number of chat completions to return. + :param model: The model to filter by. + :param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc". + :returns: A ListOpenAIChatCompletionResponse. + """ + raise NotImplementedError("List chat completions is not implemented") + + @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1) + async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: + """Get chat completion. + + Describe a chat completion by its ID. + + :param completion_id: ID of the chat completion. + :returns: A OpenAICompletionWithInputMessages. + """ + raise NotImplementedError("Get chat completion is not implemented") diff --git a/src/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py new file mode 100644 index 000000000..b9e5a6843 --- /dev/null +++ b/src/llama_stack_api/inspect.py @@ -0,0 +1,103 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Literal, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack_api.datatypes import HealthStatus +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import ( + LLAMA_STACK_API_V1, +) + +# Valid values for the route filter parameter. +# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated) +# Special filter value: "deprecated" (shows deprecated routes regardless of level) +ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"] + + +@json_schema_type +class RouteInfo(BaseModel): + """Information about an API route including its path, method, and implementing providers. + + :param route: The API endpoint path + :param method: HTTP method for the route + :param provider_types: List of provider types that implement this route + """ + + route: str + method: str + provider_types: list[str] + + +@json_schema_type +class HealthInfo(BaseModel): + """Health status information for the service. + + :param status: Current health status of the service + """ + + status: HealthStatus + + +@json_schema_type +class VersionInfo(BaseModel): + """Version information for the service. + + :param version: Version number of the service + """ + + version: str + + +@json_schema_type +class ListRoutesResponse(BaseModel): + """Response containing a list of all available API routes. + + :param data: List of available route information objects + """ + + data: list[RouteInfo] + + +@runtime_checkable +class Inspect(Protocol): + """Inspect + + APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers. + """ + + @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1) + async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse: + """List routes. + + List all available API routes with their methods and implementing providers. + + :param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes. + :returns: Response containing information about all available routes. + """ + ... + + @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False) + async def health(self) -> HealthInfo: + """Get health status. + + Get the current health status of the service. + + :returns: Health information indicating if the service is operational. + """ + ... + + @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False) + async def version(self) -> VersionInfo: + """Get version. + + Get the version of the service. + + :returns: Version information containing the service version number. + """ + ... diff --git a/src/llama_stack_api/internal/__init__.py b/src/llama_stack_api/internal/__init__.py new file mode 100644 index 000000000..bbf7010c3 --- /dev/null +++ b/src/llama_stack_api/internal/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Internal subpackage for shared interfaces that are not part of the public API. + +__all__: list[str] = [] diff --git a/src/llama_stack_api/internal/kvstore.py b/src/llama_stack_api/internal/kvstore.py new file mode 100644 index 000000000..a6d982261 --- /dev/null +++ b/src/llama_stack_api/internal/kvstore.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import datetime +from typing import Protocol + + +class KVStore(Protocol): + """Protocol for simple key/value storage backends.""" + + # TODO: make the value type bytes instead of str + async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ... + + async def get(self, key: str) -> str | None: ... + + async def delete(self, key: str) -> None: ... + + async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ... + + async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ... + + +__all__ = ["KVStore"] diff --git a/src/llama_stack_api/internal/sqlstore.py b/src/llama_stack_api/internal/sqlstore.py new file mode 100644 index 000000000..ebb2d8ba2 --- /dev/null +++ b/src/llama_stack_api/internal/sqlstore.py @@ -0,0 +1,79 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Mapping, Sequence +from enum import Enum +from typing import Any, Literal, Protocol + +from pydantic import BaseModel + +from llama_stack_api import PaginatedResponse + + +class ColumnType(Enum): + INTEGER = "INTEGER" + STRING = "STRING" + TEXT = "TEXT" + FLOAT = "FLOAT" + BOOLEAN = "BOOLEAN" + JSON = "JSON" + DATETIME = "DATETIME" + + +class ColumnDefinition(BaseModel): + type: ColumnType + primary_key: bool = False + nullable: bool = True + default: Any = None + + +class SqlStore(Protocol): + """Protocol for common SQL-store functionality.""" + + async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None: ... + + async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None: ... + + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: ... + + async def fetch_all( + self, + table: str, + where: Mapping[str, Any] | None = None, + where_sql: str | None = None, + limit: int | None = None, + order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, + cursor: tuple[str, str] | None = None, + ) -> PaginatedResponse: ... + + async def fetch_one( + self, + table: str, + where: Mapping[str, Any] | None = None, + where_sql: str | None = None, + order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, + ) -> dict[str, Any] | None: ... + + async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None: ... + + async def delete(self, table: str, where: Mapping[str, Any]) -> None: ... + + async def add_column_if_not_exists( + self, + table: str, + column_name: str, + column_type: ColumnType, + nullable: bool = True, + ) -> None: ... + + +__all__ = ["ColumnDefinition", "ColumnType", "SqlStore"] diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py new file mode 100644 index 000000000..3efdfe66b --- /dev/null +++ b/src/llama_stack_api/models.py @@ -0,0 +1,171 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import StrEnum +from typing import Any, Literal, Protocol, runtime_checkable + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +class CommonModelFields(BaseModel): + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Any additional metadata for this model", + ) + + +@json_schema_type +class ModelType(StrEnum): + """Enumeration of supported model types in Llama Stack. + :cvar llm: Large language model for text generation and completion + :cvar embedding: Embedding model for converting text to vector representations + :cvar rerank: Reranking model for reordering documents based on their relevance to a query + """ + + llm = "llm" + embedding = "embedding" + rerank = "rerank" + + +@json_schema_type +class Model(CommonModelFields, Resource): + """A model resource representing an AI model registered in Llama Stack. + + :param type: The resource type, always 'model' for model resources + :param model_type: The type of model (LLM or embedding model) + :param metadata: Any additional metadata for this model + :param identifier: Unique identifier for this resource in llama stack + :param provider_resource_id: Unique identifier for this resource in the provider + :param provider_id: ID of the provider that owns this resource + """ + + type: Literal[ResourceType.model] = ResourceType.model + + @property + def model_id(self) -> str: + return self.identifier + + @property + def provider_model_id(self) -> str: + assert self.provider_resource_id is not None, "Provider resource ID must be set" + return self.provider_resource_id + + model_config = ConfigDict(protected_namespaces=()) + + model_type: ModelType = Field(default=ModelType.llm) + + @field_validator("provider_resource_id") + @classmethod + def validate_provider_resource_id(cls, v): + if v is None: + raise ValueError("provider_resource_id cannot be None") + return v + + +class ModelInput(CommonModelFields): + model_id: str + provider_id: str | None = None + provider_model_id: str | None = None + model_type: ModelType | None = ModelType.llm + model_config = ConfigDict(protected_namespaces=()) + + +class ListModelsResponse(BaseModel): + data: list[Model] + + +@json_schema_type +class OpenAIModel(BaseModel): + """A model from OpenAI. + + :id: The ID of the model + :object: The object type, which will be "model" + :created: The Unix timestamp in seconds when the model was created + :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata + """ + + id: str + object: Literal["model"] = "model" + created: int + owned_by: str + custom_metadata: dict[str, Any] | None = None + + +@json_schema_type +class OpenAIListModelsResponse(BaseModel): + data: list[OpenAIModel] + + +@runtime_checkable +class Models(Protocol): + async def list_models(self) -> ListModelsResponse: + """List all models. + + :returns: A ListModelsResponse. + """ + ... + + @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) + async def openai_list_models(self) -> OpenAIListModelsResponse: + """List models using the OpenAI API. + + :returns: A OpenAIListModelsResponse. + """ + ... + + @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1) + async def get_model( + self, + model_id: str, + ) -> Model: + """Get model. + + Get a model by its identifier. + + :param model_id: The identifier of the model to get. + :returns: A Model. + """ + ... + + @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + async def register_model( + self, + model_id: str, + provider_model_id: str | None = None, + provider_id: str | None = None, + metadata: dict[str, Any] | None = None, + model_type: ModelType | None = None, + ) -> Model: + """Register model. + + Register a model. + + :param model_id: The identifier of the model to register. + :param provider_model_id: The identifier of the model in the provider. + :param provider_id: The identifier of the provider. + :param metadata: Any additional metadata for this model. + :param model_type: The type of model to register. + :returns: A Model. + """ + ... + + @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) + async def unregister_model( + self, + model_id: str, + ) -> None: + """Unregister model. + + Unregister a model. + + :param model_id: The identifier of the model to unregister. + """ + ... diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py new file mode 100644 index 000000000..177d2314a --- /dev/null +++ b/src/llama_stack_api/openai_responses.py @@ -0,0 +1,1362 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Sequence +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Field, model_validator +from typing_extensions import TypedDict + +from llama_stack_api.schema_utils import json_schema_type, register_schema +from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions + +# NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably +# take their YAML and generate this file automatically. Their YAML is available. + + +@json_schema_type +class OpenAIResponseError(BaseModel): + """Error details for failed OpenAI response requests. + + :param code: Error code identifying the type of failure + :param message: Human-readable error message describing the failure + """ + + code: str + message: str + + +@json_schema_type +class OpenAIResponseInputMessageContentText(BaseModel): + """Text content for input messages in OpenAI response format. + + :param text: The text content of the input message + :param type: Content type identifier, always "input_text" + """ + + text: str + type: Literal["input_text"] = "input_text" + + +@json_schema_type +class OpenAIResponseInputMessageContentImage(BaseModel): + """Image content for input messages in OpenAI response format. + + :param detail: Level of detail for image processing, can be "low", "high", or "auto" + :param type: Content type identifier, always "input_image" + :param file_id: (Optional) The ID of the file to be sent to the model. + :param image_url: (Optional) URL of the image content + """ + + detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" + type: Literal["input_image"] = "input_image" + file_id: str | None = None + image_url: str | None = None + + +@json_schema_type +class OpenAIResponseInputMessageContentFile(BaseModel): + """File content for input messages in OpenAI response format. + + :param type: The type of the input item. Always `input_file`. + :param file_data: The data of the file to be sent to the model. + :param file_id: (Optional) The ID of the file to be sent to the model. + :param file_url: The URL of the file to be sent to the model. + :param filename: The name of the file to be sent to the model. + """ + + type: Literal["input_file"] = "input_file" + file_data: str | None = None + file_id: str | None = None + file_url: str | None = None + filename: str | None = None + + @model_validator(mode="after") + def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile": + if not any([self.file_data, self.file_id, self.file_url, self.filename]): + raise ValueError( + "At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content" + ) + return self + + +OpenAIResponseInputMessageContent = Annotated[ + OpenAIResponseInputMessageContentText + | OpenAIResponseInputMessageContentImage + | OpenAIResponseInputMessageContentFile, + Field(discriminator="type"), +] +register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") + + +@json_schema_type +class OpenAIResponsePrompt(BaseModel): + """OpenAI compatible Prompt object that is used in OpenAI responses. + + :param id: Unique identifier of the prompt template + :param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types + like images or files. + :param version: Version number of the prompt to use (defaults to latest if not specified) + """ + + id: str + variables: dict[str, OpenAIResponseInputMessageContent] | None = None + version: str | None = None + + +@json_schema_type +class OpenAIResponseAnnotationFileCitation(BaseModel): + """File citation annotation for referencing specific files in response content. + + :param type: Annotation type identifier, always "file_citation" + :param file_id: Unique identifier of the referenced file + :param filename: Name of the referenced file + :param index: Position index of the citation within the content + """ + + type: Literal["file_citation"] = "file_citation" + file_id: str + filename: str + index: int + + +@json_schema_type +class OpenAIResponseAnnotationCitation(BaseModel): + """URL citation annotation for referencing external web resources. + + :param type: Annotation type identifier, always "url_citation" + :param end_index: End position of the citation span in the content + :param start_index: Start position of the citation span in the content + :param title: Title of the referenced web resource + :param url: URL of the referenced web resource + """ + + type: Literal["url_citation"] = "url_citation" + end_index: int + start_index: int + title: str + url: str + + +@json_schema_type +class OpenAIResponseAnnotationContainerFileCitation(BaseModel): + type: Literal["container_file_citation"] = "container_file_citation" + container_id: str + end_index: int + file_id: str + filename: str + start_index: int + + +@json_schema_type +class OpenAIResponseAnnotationFilePath(BaseModel): + type: Literal["file_path"] = "file_path" + file_id: str + index: int + + +OpenAIResponseAnnotations = Annotated[ + OpenAIResponseAnnotationFileCitation + | OpenAIResponseAnnotationCitation + | OpenAIResponseAnnotationContainerFileCitation + | OpenAIResponseAnnotationFilePath, + Field(discriminator="type"), +] +register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") + + +@json_schema_type +class OpenAIResponseOutputMessageContentOutputText(BaseModel): + text: str + type: Literal["output_text"] = "output_text" + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) + + +@json_schema_type +class OpenAIResponseContentPartRefusal(BaseModel): + """Refusal content within a streamed response part. + + :param type: Content part type identifier, always "refusal" + :param refusal: Refusal text supplied by the model + """ + + type: Literal["refusal"] = "refusal" + refusal: str + + +OpenAIResponseOutputMessageContent = Annotated[ + OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal, + Field(discriminator="type"), +] +register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent") + + +@json_schema_type +class OpenAIResponseMessage(BaseModel): + """ + Corresponds to the various Message types in the Responses API. + They are all under one type because the Responses API gives them all + the same "type" value, and there is no way to tell them apart in certain + scenarios. + """ + + content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent] + role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] + type: Literal["message"] = "message" + + # The fields below are not used in all scenarios, but are required in others. + id: str | None = None + status: str | None = None + + +@json_schema_type +class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel): + """Web search tool call output message for OpenAI responses. + + :param id: Unique identifier for this tool call + :param status: Current status of the web search operation + :param type: Tool call type identifier, always "web_search_call" + """ + + id: str + status: str + type: Literal["web_search_call"] = "web_search_call" + + +class OpenAIResponseOutputMessageFileSearchToolCallResults(BaseModel): + """Search results returned by the file search operation. + + :param attributes: (Optional) Key-value attributes associated with the file + :param file_id: Unique identifier of the file containing the result + :param filename: Name of the file containing the result + :param score: Relevance score for this search result (between 0 and 1) + :param text: Text content of the search result + """ + + attributes: dict[str, Any] + file_id: str + filename: str + score: float + text: str + + +@json_schema_type +class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel): + """File search tool call output message for OpenAI responses. + + :param id: Unique identifier for this tool call + :param queries: List of search queries executed + :param status: Current status of the file search operation + :param type: Tool call type identifier, always "file_search_call" + :param results: (Optional) Search results returned by the file search operation + """ + + id: str + queries: Sequence[str] + status: str + type: Literal["file_search_call"] = "file_search_call" + results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None + + +@json_schema_type +class OpenAIResponseOutputMessageFunctionToolCall(BaseModel): + """Function tool call output message for OpenAI responses. + + :param call_id: Unique identifier for the function call + :param name: Name of the function being called + :param arguments: JSON string containing the function arguments + :param type: Tool call type identifier, always "function_call" + :param id: (Optional) Additional identifier for the tool call + :param status: (Optional) Current status of the function call execution + """ + + call_id: str + name: str + arguments: str + type: Literal["function_call"] = "function_call" + id: str | None = None + status: str | None = None + + +@json_schema_type +class OpenAIResponseOutputMessageMCPCall(BaseModel): + """Model Context Protocol (MCP) call output message for OpenAI responses. + + :param id: Unique identifier for this MCP call + :param type: Tool call type identifier, always "mcp_call" + :param arguments: JSON string containing the MCP call arguments + :param name: Name of the MCP method being called + :param server_label: Label identifying the MCP server handling the call + :param error: (Optional) Error message if the MCP call failed + :param output: (Optional) Output result from the successful MCP call + """ + + id: str + type: Literal["mcp_call"] = "mcp_call" + arguments: str + name: str + server_label: str + error: str | None = None + output: str | None = None + + +class MCPListToolsTool(BaseModel): + """Tool definition returned by MCP list tools operation. + + :param input_schema: JSON schema defining the tool's input parameters + :param name: Name of the tool + :param description: (Optional) Description of what the tool does + """ + + input_schema: dict[str, Any] + name: str + description: str | None = None + + +@json_schema_type +class OpenAIResponseOutputMessageMCPListTools(BaseModel): + """MCP list tools output message containing available tools from an MCP server. + + :param id: Unique identifier for this MCP list tools operation + :param type: Tool call type identifier, always "mcp_list_tools" + :param server_label: Label identifying the MCP server providing the tools + :param tools: List of available tools provided by the MCP server + """ + + id: str + type: Literal["mcp_list_tools"] = "mcp_list_tools" + server_label: str + tools: list[MCPListToolsTool] + + +@json_schema_type +class OpenAIResponseMCPApprovalRequest(BaseModel): + """ + A request for human approval of a tool invocation. + """ + + arguments: str + id: str + name: str + server_label: str + type: Literal["mcp_approval_request"] = "mcp_approval_request" + + +@json_schema_type +class OpenAIResponseMCPApprovalResponse(BaseModel): + """ + A response to an MCP approval request. + """ + + approval_request_id: str + approve: bool + type: Literal["mcp_approval_response"] = "mcp_approval_response" + id: str | None = None + reason: str | None = None + + +OpenAIResponseOutput = Annotated[ + OpenAIResponseMessage + | OpenAIResponseOutputMessageWebSearchToolCall + | OpenAIResponseOutputMessageFileSearchToolCall + | OpenAIResponseOutputMessageFunctionToolCall + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools + | OpenAIResponseMCPApprovalRequest, + Field(discriminator="type"), +] +register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput") + + +# This has to be a TypedDict because we need a "schema" field and our strong +# typing code in the schema generator doesn't support Pydantic aliases. That also +# means we can't use a discriminator field here, because TypedDicts don't support +# default values which the strong typing code requires for discriminators. +class OpenAIResponseTextFormat(TypedDict, total=False): + """Configuration for Responses API text format. + + :param type: Must be "text", "json_schema", or "json_object" to identify the format type + :param name: The name of the response format. Only used for json_schema. + :param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema. + :param description: (Optional) A description of the response format. Only used for json_schema. + :param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema. + """ + + type: Literal["text"] | Literal["json_schema"] | Literal["json_object"] + name: str | None + schema: dict[str, Any] | None + description: str | None + strict: bool | None + + +@json_schema_type +class OpenAIResponseText(BaseModel): + """Text response configuration for OpenAI responses. + + :param format: (Optional) Text format configuration specifying output format requirements + """ + + format: OpenAIResponseTextFormat | None = None + + +# Must match type Literals of OpenAIResponseInputToolWebSearch below +WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11", "web_search_2025_08_26"] + + +@json_schema_type +class OpenAIResponseInputToolWebSearch(BaseModel): + """Web search tool configuration for OpenAI response inputs. + + :param type: Web search tool type variant to use + :param search_context_size: (Optional) Size of search context, must be "low", "medium", or "high" + """ + + # Must match values of WebSearchToolTypes above + type: ( + Literal["web_search"] + | Literal["web_search_preview"] + | Literal["web_search_preview_2025_03_11"] + | Literal["web_search_2025_08_26"] + ) = "web_search" + # TODO: actually use search_context_size somewhere... + search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") + # TODO: add user_location + + +@json_schema_type +class OpenAIResponseInputToolFunction(BaseModel): + """Function tool configuration for OpenAI response inputs. + + :param type: Tool type identifier, always "function" + :param name: Name of the function that can be called + :param description: (Optional) Description of what the function does + :param parameters: (Optional) JSON schema defining the function's parameters + :param strict: (Optional) Whether to enforce strict parameter validation + """ + + type: Literal["function"] = "function" + name: str + description: str | None = None + parameters: dict[str, Any] | None + strict: bool | None = None + + +@json_schema_type +class OpenAIResponseInputToolFileSearch(BaseModel): + """File search tool configuration for OpenAI response inputs. + + :param type: Tool type identifier, always "file_search" + :param vector_store_ids: List of vector store identifiers to search within + :param filters: (Optional) Additional filters to apply to the search + :param max_num_results: (Optional) Maximum number of search results to return (1-50) + :param ranking_options: (Optional) Options for ranking and scoring search results + """ + + type: Literal["file_search"] = "file_search" + vector_store_ids: list[str] + filters: dict[str, Any] | None = None + max_num_results: int | None = Field(default=10, ge=1, le=50) + ranking_options: FileSearchRankingOptions | None = None + + +class ApprovalFilter(BaseModel): + """Filter configuration for MCP tool approval requirements. + + :param always: (Optional) List of tool names that always require approval + :param never: (Optional) List of tool names that never require approval + """ + + always: list[str] | None = None + never: list[str] | None = None + + +class AllowedToolsFilter(BaseModel): + """Filter configuration for restricting which MCP tools can be used. + + :param tool_names: (Optional) List of specific tool names that are allowed + """ + + tool_names: list[str] | None = None + + +@json_schema_type +class OpenAIResponseInputToolMCP(BaseModel): + """Model Context Protocol (MCP) tool configuration for OpenAI response inputs. + + :param type: Tool type identifier, always "mcp" + :param server_label: Label to identify this MCP server + :param server_url: URL endpoint of the MCP server + :param headers: (Optional) HTTP headers to include when connecting to the server + :param authorization: (Optional) OAuth access token for authenticating with the MCP server + :param require_approval: Approval requirement for tool calls ("always", "never", or filter) + :param allowed_tools: (Optional) Restriction on which tools can be used from this server + """ + + type: Literal["mcp"] = "mcp" + server_label: str + server_url: str + headers: dict[str, Any] | None = None + authorization: str | None = Field(default=None, exclude=True) + + require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never" + allowed_tools: list[str] | AllowedToolsFilter | None = None + + +OpenAIResponseInputTool = Annotated[ + OpenAIResponseInputToolWebSearch + | OpenAIResponseInputToolFileSearch + | OpenAIResponseInputToolFunction + | OpenAIResponseInputToolMCP, + Field(discriminator="type"), +] +register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool") + + +@json_schema_type +class OpenAIResponseToolMCP(BaseModel): + """Model Context Protocol (MCP) tool configuration for OpenAI response object. + + :param type: Tool type identifier, always "mcp" + :param server_label: Label to identify this MCP server + :param allowed_tools: (Optional) Restriction on which tools can be used from this server + """ + + type: Literal["mcp"] = "mcp" + server_label: str + allowed_tools: list[str] | AllowedToolsFilter | None = None + + +OpenAIResponseTool = Annotated[ + OpenAIResponseInputToolWebSearch + | OpenAIResponseInputToolFileSearch + | OpenAIResponseInputToolFunction + | OpenAIResponseToolMCP, # The only type that differes from that in the inputs is the MCP tool + Field(discriminator="type"), +] +register_schema(OpenAIResponseTool, name="OpenAIResponseTool") + + +class OpenAIResponseUsageOutputTokensDetails(BaseModel): + """Token details for output tokens in OpenAI response usage. + + :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models) + """ + + reasoning_tokens: int | None = None + + +class OpenAIResponseUsageInputTokensDetails(BaseModel): + """Token details for input tokens in OpenAI response usage. + + :param cached_tokens: Number of tokens retrieved from cache + """ + + cached_tokens: int | None = None + + +@json_schema_type +class OpenAIResponseUsage(BaseModel): + """Usage information for OpenAI response. + + :param input_tokens: Number of tokens in the input + :param output_tokens: Number of tokens in the output + :param total_tokens: Total tokens used (input + output) + :param input_tokens_details: Detailed breakdown of input token usage + :param output_tokens_details: Detailed breakdown of output token usage + """ + + input_tokens: int + output_tokens: int + total_tokens: int + input_tokens_details: OpenAIResponseUsageInputTokensDetails | None = None + output_tokens_details: OpenAIResponseUsageOutputTokensDetails | None = None + + +@json_schema_type +class OpenAIResponseObject(BaseModel): + """Complete OpenAI response object containing generation results and metadata. + + :param created_at: Unix timestamp when the response was created + :param error: (Optional) Error details if the response generation failed + :param id: Unique identifier for this response + :param model: Model identifier used for generation + :param object: Object type identifier, always "response" + :param output: List of generated output items (messages, tool calls, etc.) + :param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn. + :param previous_response_id: (Optional) ID of the previous response in a conversation + :param prompt: (Optional) Reference to a prompt template and its variables. + :param status: Current status of the response generation + :param temperature: (Optional) Sampling temperature used for generation + :param text: Text formatting configuration for the response + :param top_p: (Optional) Nucleus sampling parameter used for generation + :param tools: (Optional) An array of tools the model may call while generating a response. + :param truncation: (Optional) Truncation strategy applied to the response + :param usage: (Optional) Token usage information for the response + :param instructions: (Optional) System message inserted into the model's context + :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response + :param metadata: (Optional) Dictionary of metadata key-value pairs + """ + + created_at: int + error: OpenAIResponseError | None = None + id: str + model: str + object: Literal["response"] = "response" + output: Sequence[OpenAIResponseOutput] + parallel_tool_calls: bool | None = True + previous_response_id: str | None = None + prompt: OpenAIResponsePrompt | None = None + status: str + temperature: float | None = None + # Default to text format to avoid breaking the loading of old responses + # before the field was added. New responses will have this set always. + text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) + top_p: float | None = None + tools: Sequence[OpenAIResponseTool] | None = None + truncation: str | None = None + usage: OpenAIResponseUsage | None = None + instructions: str | None = None + max_tool_calls: int | None = None + metadata: dict[str, str] | None = None + + +@json_schema_type +class OpenAIDeleteResponseObject(BaseModel): + """Response object confirming deletion of an OpenAI response. + + :param id: Unique identifier of the deleted response + :param object: Object type identifier, always "response" + :param deleted: Deletion confirmation flag, always True + """ + + id: str + object: Literal["response"] = "response" + deleted: bool = True + + +@json_schema_type +class OpenAIResponseObjectStreamResponseCreated(BaseModel): + """Streaming event indicating a new response has been created. + + :param response: The response object that was created + :param type: Event type identifier, always "response.created" + """ + + response: OpenAIResponseObject + type: Literal["response.created"] = "response.created" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseInProgress(BaseModel): + """Streaming event indicating the response remains in progress. + + :param response: Current response state while in progress + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.in_progress" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.in_progress"] = "response.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseCompleted(BaseModel): + """Streaming event indicating a response has been completed. + + :param response: Completed response object + :param type: Event type identifier, always "response.completed" + """ + + response: OpenAIResponseObject + type: Literal["response.completed"] = "response.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseIncomplete(BaseModel): + """Streaming event emitted when a response ends in an incomplete state. + + :param response: Response object describing the incomplete state + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.incomplete" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.incomplete"] = "response.incomplete" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFailed(BaseModel): + """Streaming event emitted when a response fails. + + :param response: Response object describing the failure + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.failed" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.failed"] = "response.failed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel): + """Streaming event for when a new output item is added to the response. + + :param response_id: Unique identifier of the response containing this output + :param item: The output item that was added (message, tool call, etc.) + :param output_index: Index position of this item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.output_item.added" + """ + + response_id: str + item: OpenAIResponseOutput + output_index: int + sequence_number: int + type: Literal["response.output_item.added"] = "response.output_item.added" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel): + """Streaming event for when an output item is completed. + + :param response_id: Unique identifier of the response containing this output + :param item: The completed output item (message, tool call, etc.) + :param output_index: Index position of this item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.output_item.done" + """ + + response_id: str + item: OpenAIResponseOutput + output_index: int + sequence_number: int + type: Literal["response.output_item.done"] = "response.output_item.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel): + """Streaming event for incremental text content updates. + + :param content_index: Index position within the text content + :param delta: Incremental text content being added + :param item_id: Unique identifier of the output item being updated + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.output_text.delta" + """ + + content_index: int + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.output_text.delta"] = "response.output_text.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel): + """Streaming event for when text output is completed. + + :param content_index: Index position within the text content + :param text: Final complete text content of the output item + :param item_id: Unique identifier of the completed output item + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.output_text.done" + """ + + content_index: int + text: str # final text of the output item + item_id: str + output_index: int + sequence_number: int + type: Literal["response.output_text.done"] = "response.output_text.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel): + """Streaming event for incremental function call argument updates. + + :param delta: Incremental function call arguments being added + :param item_id: Unique identifier of the function call being updated + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.function_call_arguments.delta" + """ + + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel): + """Streaming event for when function call arguments are completed. + + :param arguments: Final complete arguments JSON string for the function call + :param item_id: Unique identifier of the completed function call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.function_call_arguments.done" + """ + + arguments: str # final arguments of the function call + item_id: str + output_index: int + sequence_number: int + type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel): + """Streaming event for web search calls in progress. + + :param item_id: Unique identifier of the web search call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.web_search_call.in_progress" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel): + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel): + """Streaming event for completed web search calls. + + :param item_id: Unique identifier of the completed web search call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.web_search_call.completed" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel): + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel): + arguments: str # final arguments of the MCP call + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel): + """Streaming event for MCP calls in progress. + + :param item_id: Unique identifier of the MCP call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.mcp_call.in_progress" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel): + """Streaming event for failed MCP calls. + + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.mcp_call.failed" + """ + + sequence_number: int + type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel): + """Streaming event for completed MCP calls. + + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.mcp_call.completed" + """ + + sequence_number: int + type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed" + + +@json_schema_type +class OpenAIResponseContentPartOutputText(BaseModel): + """Text content within a streamed response part. + + :param type: Content part type identifier, always "output_text" + :param text: Text emitted for this content part + :param annotations: Structured annotations associated with the text + :param logprobs: (Optional) Token log probability details + """ + + type: Literal["output_text"] = "output_text" + text: str + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) + logprobs: list[dict[str, Any]] | None = None + + +@json_schema_type +class OpenAIResponseContentPartReasoningText(BaseModel): + """Reasoning text emitted as part of a streamed response. + + :param type: Content part type identifier, always "reasoning_text" + :param text: Reasoning text supplied by the model + """ + + type: Literal["reasoning_text"] = "reasoning_text" + text: str + + +OpenAIResponseContentPart = Annotated[ + OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText, + Field(discriminator="type"), +] +register_schema(OpenAIResponseContentPart, name="OpenAIResponseContentPart") + + +@json_schema_type +class OpenAIResponseObjectStreamResponseContentPartAdded(BaseModel): + """Streaming event for when a new content part is added to a response item. + + :param content_index: Index position of the part within the content array + :param response_id: Unique identifier of the response containing this content + :param item_id: Unique identifier of the output item containing this content part + :param output_index: Index position of the output item in the response + :param part: The content part that was added + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.content_part.added" + """ + + content_index: int + response_id: str + item_id: str + output_index: int + part: OpenAIResponseContentPart + sequence_number: int + type: Literal["response.content_part.added"] = "response.content_part.added" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseContentPartDone(BaseModel): + """Streaming event for when a content part is completed. + + :param content_index: Index position of the part within the content array + :param response_id: Unique identifier of the response containing this content + :param item_id: Unique identifier of the output item containing this content part + :param output_index: Index position of the output item in the response + :param part: The completed content part + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.content_part.done" + """ + + content_index: int + response_id: str + item_id: str + output_index: int + part: OpenAIResponseContentPart + sequence_number: int + type: Literal["response.content_part.done"] = "response.content_part.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningTextDelta(BaseModel): + """Streaming event for incremental reasoning text updates. + + :param content_index: Index position of the reasoning content part + :param delta: Incremental reasoning text being added + :param item_id: Unique identifier of the output item being updated + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.reasoning_text.delta" + """ + + content_index: int + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.reasoning_text.delta"] = "response.reasoning_text.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningTextDone(BaseModel): + """Streaming event for when reasoning text is completed. + + :param content_index: Index position of the reasoning content part + :param text: Final complete reasoning text + :param item_id: Unique identifier of the completed output item + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.reasoning_text.done" + """ + + content_index: int + text: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.reasoning_text.done"] = "response.reasoning_text.done" + + +@json_schema_type +class OpenAIResponseContentPartReasoningSummary(BaseModel): + """Reasoning summary part in a streamed response. + + :param type: Content part type identifier, always "summary_text" + :param text: Summary text + """ + + type: Literal["summary_text"] = "summary_text" + text: str + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded(BaseModel): + """Streaming event for when a new reasoning summary part is added. + + :param item_id: Unique identifier of the output item + :param output_index: Index position of the output item + :param part: The summary part that was added + :param sequence_number: Sequential number for ordering streaming events + :param summary_index: Index of the summary part within the reasoning summary + :param type: Event type identifier, always "response.reasoning_summary_part.added" + """ + + item_id: str + output_index: int + part: OpenAIResponseContentPartReasoningSummary + sequence_number: int + summary_index: int + type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningSummaryPartDone(BaseModel): + """Streaming event for when a reasoning summary part is completed. + + :param item_id: Unique identifier of the output item + :param output_index: Index position of the output item + :param part: The completed summary part + :param sequence_number: Sequential number for ordering streaming events + :param summary_index: Index of the summary part within the reasoning summary + :param type: Event type identifier, always "response.reasoning_summary_part.done" + """ + + item_id: str + output_index: int + part: OpenAIResponseContentPartReasoningSummary + sequence_number: int + summary_index: int + type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta(BaseModel): + """Streaming event for incremental reasoning summary text updates. + + :param delta: Incremental summary text being added + :param item_id: Unique identifier of the output item + :param output_index: Index position of the output item + :param sequence_number: Sequential number for ordering streaming events + :param summary_index: Index of the summary part within the reasoning summary + :param type: Event type identifier, always "response.reasoning_summary_text.delta" + """ + + delta: str + item_id: str + output_index: int + sequence_number: int + summary_index: int + type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseReasoningSummaryTextDone(BaseModel): + """Streaming event for when reasoning summary text is completed. + + :param text: Final complete summary text + :param item_id: Unique identifier of the output item + :param output_index: Index position of the output item + :param sequence_number: Sequential number for ordering streaming events + :param summary_index: Index of the summary part within the reasoning summary + :param type: Event type identifier, always "response.reasoning_summary_text.done" + """ + + text: str + item_id: str + output_index: int + sequence_number: int + summary_index: int + type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseRefusalDelta(BaseModel): + """Streaming event for incremental refusal text updates. + + :param content_index: Index position of the content part + :param delta: Incremental refusal text being added + :param item_id: Unique identifier of the output item + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.refusal.delta" + """ + + content_index: int + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.refusal.delta"] = "response.refusal.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseRefusalDone(BaseModel): + """Streaming event for when refusal text is completed. + + :param content_index: Index position of the content part + :param refusal: Final complete refusal text + :param item_id: Unique identifier of the output item + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.refusal.done" + """ + + content_index: int + refusal: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.refusal.done"] = "response.refusal.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded(BaseModel): + """Streaming event for when an annotation is added to output text. + + :param item_id: Unique identifier of the item to which the annotation is being added + :param output_index: Index position of the output item in the response's output array + :param content_index: Index position of the content part within the output item + :param annotation_index: Index of the annotation within the content part + :param annotation: The annotation object being added + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.output_text.annotation.added" + """ + + item_id: str + output_index: int + content_index: int + annotation_index: int + annotation: OpenAIResponseAnnotations + sequence_number: int + type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFileSearchCallInProgress(BaseModel): + """Streaming event for file search calls in progress. + + :param item_id: Unique identifier of the file search call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.file_search_call.in_progress" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.file_search_call.in_progress"] = "response.file_search_call.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFileSearchCallSearching(BaseModel): + """Streaming event for file search currently searching. + + :param item_id: Unique identifier of the file search call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.file_search_call.searching" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.file_search_call.searching"] = "response.file_search_call.searching" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFileSearchCallCompleted(BaseModel): + """Streaming event for completed file search calls. + + :param item_id: Unique identifier of the completed file search call + :param output_index: Index position of the item in the output list + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.file_search_call.completed" + """ + + item_id: str + output_index: int + sequence_number: int + type: Literal["response.file_search_call.completed"] = "response.file_search_call.completed" + + +OpenAIResponseObjectStream = Annotated[ + OpenAIResponseObjectStreamResponseCreated + | OpenAIResponseObjectStreamResponseInProgress + | OpenAIResponseObjectStreamResponseOutputItemAdded + | OpenAIResponseObjectStreamResponseOutputItemDone + | OpenAIResponseObjectStreamResponseOutputTextDelta + | OpenAIResponseObjectStreamResponseOutputTextDone + | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + | OpenAIResponseObjectStreamResponseWebSearchCallInProgress + | OpenAIResponseObjectStreamResponseWebSearchCallSearching + | OpenAIResponseObjectStreamResponseWebSearchCallCompleted + | OpenAIResponseObjectStreamResponseMcpListToolsInProgress + | OpenAIResponseObjectStreamResponseMcpListToolsFailed + | OpenAIResponseObjectStreamResponseMcpListToolsCompleted + | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + | OpenAIResponseObjectStreamResponseMcpCallInProgress + | OpenAIResponseObjectStreamResponseMcpCallFailed + | OpenAIResponseObjectStreamResponseMcpCallCompleted + | OpenAIResponseObjectStreamResponseContentPartAdded + | OpenAIResponseObjectStreamResponseContentPartDone + | OpenAIResponseObjectStreamResponseReasoningTextDelta + | OpenAIResponseObjectStreamResponseReasoningTextDone + | OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded + | OpenAIResponseObjectStreamResponseReasoningSummaryPartDone + | OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta + | OpenAIResponseObjectStreamResponseReasoningSummaryTextDone + | OpenAIResponseObjectStreamResponseRefusalDelta + | OpenAIResponseObjectStreamResponseRefusalDone + | OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded + | OpenAIResponseObjectStreamResponseFileSearchCallInProgress + | OpenAIResponseObjectStreamResponseFileSearchCallSearching + | OpenAIResponseObjectStreamResponseFileSearchCallCompleted + | OpenAIResponseObjectStreamResponseIncomplete + | OpenAIResponseObjectStreamResponseFailed + | OpenAIResponseObjectStreamResponseCompleted, + Field(discriminator="type"), +] +register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") + + +@json_schema_type +class OpenAIResponseInputFunctionToolCallOutput(BaseModel): + """ + This represents the output of a function call that gets passed back to the model. + """ + + call_id: str + output: str + type: Literal["function_call_output"] = "function_call_output" + id: str | None = None + status: str | None = None + + +OpenAIResponseInput = Annotated[ + # Responses API allows output messages to be passed in as input + OpenAIResponseOutput + | OpenAIResponseInputFunctionToolCallOutput + | OpenAIResponseMCPApprovalResponse + | OpenAIResponseMessage, + Field(union_mode="left_to_right"), +] +register_schema(OpenAIResponseInput, name="OpenAIResponseInput") + + +@json_schema_type +class ListOpenAIResponseInputItem(BaseModel): + """List container for OpenAI response input items. + + :param data: List of input items + :param object: Object type identifier, always "list" + """ + + data: Sequence[OpenAIResponseInput] + object: Literal["list"] = "list" + + +@json_schema_type +class OpenAIResponseObjectWithInput(OpenAIResponseObject): + """OpenAI response object extended with input context information. + + :param input: List of input items that led to this response + """ + + input: Sequence[OpenAIResponseInput] + + def to_response_object(self) -> OpenAIResponseObject: + """Convert to OpenAIResponseObject by excluding input field.""" + return OpenAIResponseObject(**{k: v for k, v in self.model_dump().items() if k != "input"}) + + +@json_schema_type +class ListOpenAIResponseObject(BaseModel): + """Paginated list of OpenAI response objects with navigation metadata. + + :param data: List of response objects with their input context + :param has_more: Whether there are more results available beyond this page + :param first_id: Identifier of the first item in this page + :param last_id: Identifier of the last item in this page + :param object: Object type identifier, always "list" + """ + + data: Sequence[OpenAIResponseObjectWithInput] + has_more: bool + first_id: str + last_id: str + object: Literal["list"] = "list" diff --git a/src/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py new file mode 100644 index 000000000..505c8bfd7 --- /dev/null +++ b/src/llama_stack_api/post_training.py @@ -0,0 +1,370 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import datetime +from enum import Enum +from typing import Annotated, Any, Literal, Protocol + +from pydantic import BaseModel, Field + +from llama_stack_api.common.content_types import URL +from llama_stack_api.common.job_types import JobStatus +from llama_stack_api.common.training_types import Checkpoint +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA + + +@json_schema_type +class OptimizerType(Enum): + """Available optimizer algorithms for training. + :cvar adam: Adaptive Moment Estimation optimizer + :cvar adamw: AdamW optimizer with weight decay + :cvar sgd: Stochastic Gradient Descent optimizer + """ + + adam = "adam" + adamw = "adamw" + sgd = "sgd" + + +@json_schema_type +class DatasetFormat(Enum): + """Format of the training dataset. + :cvar instruct: Instruction-following format with prompt and completion + :cvar dialog: Multi-turn conversation format with messages + """ + + instruct = "instruct" + dialog = "dialog" + + +@json_schema_type +class DataConfig(BaseModel): + """Configuration for training data and data loading. + + :param dataset_id: Unique identifier for the training dataset + :param batch_size: Number of samples per training batch + :param shuffle: Whether to shuffle the dataset during training + :param data_format: Format of the dataset (instruct or dialog) + :param validation_dataset_id: (Optional) Unique identifier for the validation dataset + :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency + :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens + """ + + dataset_id: str + batch_size: int + shuffle: bool + data_format: DatasetFormat + validation_dataset_id: str | None = None + packed: bool | None = False + train_on_input: bool | None = False + + +@json_schema_type +class OptimizerConfig(BaseModel): + """Configuration parameters for the optimization algorithm. + + :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd) + :param lr: Learning rate for the optimizer + :param weight_decay: Weight decay coefficient for regularization + :param num_warmup_steps: Number of steps for learning rate warmup + """ + + optimizer_type: OptimizerType + lr: float + weight_decay: float + num_warmup_steps: int + + +@json_schema_type +class EfficiencyConfig(BaseModel): + """Configuration for memory and compute efficiency optimizations. + + :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage + :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory + :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping + :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU + """ + + enable_activation_checkpointing: bool | None = False + enable_activation_offloading: bool | None = False + memory_efficient_fsdp_wrap: bool | None = False + fsdp_cpu_offload: bool | None = False + + +@json_schema_type +class TrainingConfig(BaseModel): + """Comprehensive configuration for the training process. + + :param n_epochs: Number of training epochs to run + :param max_steps_per_epoch: Maximum number of steps to run per epoch + :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating + :param max_validation_steps: (Optional) Maximum number of validation steps per epoch + :param data_config: (Optional) Configuration for data loading and formatting + :param optimizer_config: (Optional) Configuration for the optimization algorithm + :param efficiency_config: (Optional) Configuration for memory and compute optimizations + :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32) + """ + + n_epochs: int + max_steps_per_epoch: int = 1 + gradient_accumulation_steps: int = 1 + max_validation_steps: int | None = 1 + data_config: DataConfig | None = None + optimizer_config: OptimizerConfig | None = None + efficiency_config: EfficiencyConfig | None = None + dtype: str | None = "bf16" + + +@json_schema_type +class LoraFinetuningConfig(BaseModel): + """Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + + :param type: Algorithm type identifier, always "LoRA" + :param lora_attn_modules: List of attention module names to apply LoRA to + :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers + :param apply_lora_to_output: Whether to apply LoRA to output projection layers + :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters) + :param alpha: LoRA scaling parameter that controls adaptation strength + :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + :param quantize_base: (Optional) Whether to quantize the base model weights + """ + + type: Literal["LoRA"] = "LoRA" + lora_attn_modules: list[str] + apply_lora_to_mlp: bool + apply_lora_to_output: bool + rank: int + alpha: int + use_dora: bool | None = False + quantize_base: bool | None = False + + +@json_schema_type +class QATFinetuningConfig(BaseModel): + """Configuration for Quantization-Aware Training (QAT) fine-tuning. + + :param type: Algorithm type identifier, always "QAT" + :param quantizer_name: Name of the quantization algorithm to use + :param group_size: Size of groups for grouped quantization + """ + + type: Literal["QAT"] = "QAT" + quantizer_name: str + group_size: int + + +AlgorithmConfig = Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")] +register_schema(AlgorithmConfig, name="AlgorithmConfig") + + +@json_schema_type +class PostTrainingJobLogStream(BaseModel): + """Stream of logs from a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param log_lines: List of log message strings from the training process + """ + + job_uuid: str + log_lines: list[str] + + +@json_schema_type +class RLHFAlgorithm(Enum): + """Available reinforcement learning from human feedback algorithms. + :cvar dpo: Direct Preference Optimization algorithm + """ + + dpo = "dpo" + + +@json_schema_type +class DPOLossType(Enum): + sigmoid = "sigmoid" + hinge = "hinge" + ipo = "ipo" + kto_pair = "kto_pair" + + +@json_schema_type +class DPOAlignmentConfig(BaseModel): + """Configuration for Direct Preference Optimization (DPO) alignment. + + :param beta: Temperature parameter for the DPO loss + :param loss_type: The type of loss function to use for DPO + """ + + beta: float + loss_type: DPOLossType = DPOLossType.sigmoid + + +@json_schema_type +class PostTrainingRLHFRequest(BaseModel): + """Request to finetune a model using reinforcement learning from human feedback. + + :param job_uuid: Unique identifier for the training job + :param finetuned_model: URL or path to the base model to fine-tune + :param dataset_id: Unique identifier for the training dataset + :param validation_dataset_id: Unique identifier for the validation dataset + :param algorithm: RLHF algorithm to use for training + :param algorithm_config: Configuration parameters for the RLHF algorithm + :param optimizer_config: Configuration parameters for the optimization algorithm + :param training_config: Configuration parameters for the training process + :param hyperparam_search_config: Configuration for hyperparameter search + :param logger_config: Configuration for training logging + """ + + job_uuid: str + + finetuned_model: URL + + dataset_id: str + validation_dataset_id: str + + algorithm: RLHFAlgorithm + algorithm_config: DPOAlignmentConfig + + optimizer_config: OptimizerConfig + training_config: TrainingConfig + + # TODO: define these + hyperparam_search_config: dict[str, Any] + logger_config: dict[str, Any] + + +@json_schema_type +class PostTrainingJob(BaseModel): + job_uuid: str + + +@json_schema_type +class PostTrainingJobStatusResponse(BaseModel): + """Status of a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param status: Current status of the training job + :param scheduled_at: (Optional) Timestamp when the job was scheduled + :param started_at: (Optional) Timestamp when the job execution began + :param completed_at: (Optional) Timestamp when the job finished, if completed + :param resources_allocated: (Optional) Information about computational resources allocated to the job + :param checkpoints: List of model checkpoints created during training + """ + + job_uuid: str + status: JobStatus + + scheduled_at: datetime | None = None + started_at: datetime | None = None + completed_at: datetime | None = None + + resources_allocated: dict[str, Any] | None = None + + checkpoints: list[Checkpoint] = Field(default_factory=list) + + +@json_schema_type +class ListPostTrainingJobsResponse(BaseModel): + data: list[PostTrainingJob] + + +@json_schema_type +class PostTrainingJobArtifactsResponse(BaseModel): + """Artifacts of a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param checkpoints: List of model checkpoints created during training + """ + + job_uuid: str + checkpoints: list[Checkpoint] = Field(default_factory=list) + + # TODO(ashwin): metrics, evals + + +class PostTraining(Protocol): + @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def supervised_fine_tune( + self, + job_uuid: str, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + model: str | None = Field( + default=None, + description="Model descriptor for training if not in provider config`", + ), + checkpoint_dir: str | None = None, + algorithm_config: AlgorithmConfig | None = None, + ) -> PostTrainingJob: + """Run supervised fine-tuning of a model. + + :param job_uuid: The UUID of the job to create. + :param training_config: The training configuration. + :param hyperparam_search_config: The hyperparam search configuration. + :param logger_config: The logger configuration. + :param model: The model to fine-tune. + :param checkpoint_dir: The directory to save checkpoint(s) to. + :param algorithm_config: The algorithm configuration. + :returns: A PostTrainingJob. + """ + ... + + @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def preference_optimize( + self, + job_uuid: str, + finetuned_model: str, + algorithm_config: DPOAlignmentConfig, + training_config: TrainingConfig, + hyperparam_search_config: dict[str, Any], + logger_config: dict[str, Any], + ) -> PostTrainingJob: + """Run preference optimization of a model. + + :param job_uuid: The UUID of the job to create. + :param finetuned_model: The model to fine-tune. + :param algorithm_config: The algorithm configuration. + :param training_config: The training configuration. + :param hyperparam_search_config: The hyperparam search configuration. + :param logger_config: The logger configuration. + :returns: A PostTrainingJob. + """ + ... + + @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: + """Get all training jobs. + + :returns: A ListPostTrainingJobsResponse. + """ + ... + + @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: + """Get the status of a training job. + + :param job_uuid: The UUID of the job to get the status of. + :returns: A PostTrainingJobStatusResponse. + """ + ... + + @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA) + async def cancel_training_job(self, job_uuid: str) -> None: + """Cancel a training job. + + :param job_uuid: The UUID of the job to cancel. + """ + ... + + @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA) + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: + """Get the artifacts of a training job. + + :param job_uuid: The UUID of the job to get the artifacts of. + :returns: A PostTrainingJobArtifactsResponse. + """ + ... diff --git a/src/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py new file mode 100644 index 000000000..2054ccd30 --- /dev/null +++ b/src/llama_stack_api/prompts.py @@ -0,0 +1,203 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +import secrets +from typing import Protocol, runtime_checkable + +from pydantic import BaseModel, Field, field_validator, model_validator + +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +@json_schema_type +class Prompt(BaseModel): + """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack. + + :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API. + :param version: Version (integer starting at 1, incremented on save) + :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>' + :param variables: List of prompt variable names that can be used in the prompt template + :param is_default: Boolean indicating whether this version is the default version for this prompt + """ + + prompt: str | None = Field(default=None, description="The system prompt with variable placeholders") + version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1) + prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'") + variables: list[str] = Field( + default_factory=list, description="List of variable names that can be used in the prompt template" + ) + is_default: bool = Field( + default=False, description="Boolean indicating whether this version is the default version" + ) + + @field_validator("prompt_id") + @classmethod + def validate_prompt_id(cls, prompt_id: str) -> str: + if not isinstance(prompt_id, str): + raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'") + + if not prompt_id.startswith("pmpt_"): + raise ValueError("prompt_id must start with 'pmpt_' prefix") + + hex_part = prompt_id[5:] + if len(hex_part) != 48: + raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)") + + for char in hex_part: + if char not in "0123456789abcdef": + raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]") + + return prompt_id + + @field_validator("version") + @classmethod + def validate_version(cls, prompt_version: int) -> int: + if prompt_version < 1: + raise ValueError("version must be >= 1") + return prompt_version + + @model_validator(mode="after") + def validate_prompt_variables(self): + """Validate that all variables used in the prompt are declared in the variables list.""" + if not self.prompt: + return self + + prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt)) + declared_variables = set(self.variables) + + undeclared = prompt_variables - declared_variables + if undeclared: + raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}") + + return self + + @classmethod + def generate_prompt_id(cls) -> str: + # Generate 48 hex characters (24 bytes) + random_bytes = secrets.token_bytes(24) + hex_string = random_bytes.hex() + return f"pmpt_{hex_string}" + + +@json_schema_type +class ListPromptsResponse(BaseModel): + """Response model to list prompts.""" + + data: list[Prompt] + + +@runtime_checkable +class Prompts(Protocol): + """Prompts + + Protocol for prompt management operations.""" + + @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1) + async def list_prompts(self) -> ListPromptsResponse: + """List all prompts. + + :returns: A ListPromptsResponse containing all prompts. + """ + ... + + @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1) + async def list_prompt_versions( + self, + prompt_id: str, + ) -> ListPromptsResponse: + """List prompt versions. + + List all versions of a specific prompt. + + :param prompt_id: The identifier of the prompt to list versions for. + :returns: A ListPromptsResponse containing all versions of the prompt. + """ + ... + + @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1) + async def get_prompt( + self, + prompt_id: str, + version: int | None = None, + ) -> Prompt: + """Get prompt. + + Get a prompt by its identifier and optional version. + + :param prompt_id: The identifier of the prompt to get. + :param version: The version of the prompt to get (defaults to latest). + :returns: A Prompt resource. + """ + ... + + @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1) + async def create_prompt( + self, + prompt: str, + variables: list[str] | None = None, + ) -> Prompt: + """Create prompt. + + Create a new prompt. + + :param prompt: The prompt text content with variable placeholders. + :param variables: List of variable names that can be used in the prompt template. + :returns: The created Prompt resource. + """ + ... + + @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1) + async def update_prompt( + self, + prompt_id: str, + prompt: str, + version: int, + variables: list[str] | None = None, + set_as_default: bool = True, + ) -> Prompt: + """Update prompt. + + Update an existing prompt (increments version). + + :param prompt_id: The identifier of the prompt to update. + :param prompt: The updated prompt text content. + :param version: The current version of the prompt being updated. + :param variables: Updated list of variable names that can be used in the prompt template. + :param set_as_default: Set the new version as the default (default=True). + :returns: The updated Prompt resource with incremented version. + """ + ... + + @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1) + async def delete_prompt( + self, + prompt_id: str, + ) -> None: + """Delete prompt. + + Delete a prompt. + + :param prompt_id: The identifier of the prompt to delete. + """ + ... + + @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1) + async def set_default_version( + self, + prompt_id: str, + version: int, + ) -> Prompt: + """Set prompt version. + + Set which version of a prompt should be the default in get_prompt (latest). + + :param prompt_id: The identifier of the prompt. + :param version: The version to set as default. + :returns: The prompt with the specified version now set as default. + """ + ... diff --git a/src/llama_stack_api/providers.py b/src/llama_stack_api/providers.py new file mode 100644 index 000000000..88c66f261 --- /dev/null +++ b/src/llama_stack_api/providers.py @@ -0,0 +1,70 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack_api.datatypes import HealthResponse +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +@json_schema_type +class ProviderInfo(BaseModel): + """Information about a registered provider including its configuration and health status. + + :param api: The API name this provider implements + :param provider_id: Unique identifier for the provider + :param provider_type: The type of provider implementation + :param config: Configuration parameters for the provider + :param health: Current health status of the provider + """ + + api: str + provider_id: str + provider_type: str + config: dict[str, Any] + health: HealthResponse + + +@json_schema_type +class ListProvidersResponse(BaseModel): + """Response containing a list of all available providers. + + :param data: List of provider information objects + """ + + data: list[ProviderInfo] + + +@runtime_checkable +class Providers(Protocol): + """Providers + + Providers API for inspecting, listing, and modifying providers and their configurations. + """ + + @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1) + async def list_providers(self) -> ListProvidersResponse: + """List providers. + + List all available providers. + + :returns: A ListProvidersResponse containing information about all providers. + """ + ... + + @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1) + async def inspect_provider(self, provider_id: str) -> ProviderInfo: + """Get provider. + + Get detailed information about a specific provider. + + :param provider_id: The ID of the provider to inspect. + :returns: A ProviderInfo object containing the provider's details. + """ + ... diff --git a/llama_stack/strong_typing/py.typed b/src/llama_stack_api/py.typed similarity index 100% rename from llama_stack/strong_typing/py.typed rename to src/llama_stack_api/py.typed diff --git a/src/llama_stack_api/pyproject.toml b/src/llama_stack_api/pyproject.toml new file mode 100644 index 000000000..0ceb2bb4e --- /dev/null +++ b/src/llama_stack_api/pyproject.toml @@ -0,0 +1,82 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.uv] +required-version = ">=0.7.0" + +[project] +name = "llama-stack-api" +version = "0.4.0.dev0" +authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] +description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs" +readme = "README.md" +requires-python = ">=3.12" +license = { "text" = "MIT" } +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", +] +dependencies = [ + "pydantic>=2.11.9", + "jsonschema", + "opentelemetry-sdk>=1.30.0", + "opentelemetry-exporter-otlp-proto-http>=1.30.0", +] + +[project.urls] +Homepage = "https://github.com/llamastack/llama-stack" + +[tool.setuptools.packages.find] +where = ["."] +include = ["llama_stack_api", "llama_stack_api.*"] + +[tool.setuptools.package-data] +llama_stack_api = ["py.typed"] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "UP", # pyupgrade + "B", # flake8-bugbear + "B9", # flake8-bugbear subset + "C", # comprehensions + "E", # pycodestyle + "F", # Pyflakes + "N", # Naming + "W", # Warnings + "DTZ", # datetime rules + "I", # isort (imports order) + "RUF001", # Checks for ambiguous Unicode characters in strings + "RUF002", # Checks for ambiguous Unicode characters in docstrings + "RUF003", # Checks for ambiguous Unicode characters in comments + "PLC2401", # Checks for the use of non-ASCII characters in variable names +] +ignore = [ + # The following ignores are desired by the project maintainers. + "E402", # Module level import not at top of file + "E501", # Line too long + "F405", # Maybe undefined or defined from star import + "C408", # Ignored because we like the dict keyword argument syntax + "N812", # Ignored because import torch.nn.functional as F is PyTorch convention + + # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later. + "C901", # Complexity of the function is too high +] +unfixable = [ + "PLE2515", +] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually + +[tool.ruff.lint.per-file-ignores] +"llama_stack_api/apis/**/__init__.py" = ["F403"] + +[tool.ruff.lint.pep8-naming] +classmethod-decorators = ["classmethod", "pydantic.field_validator"] diff --git a/src/llama_stack_api/rag_tool.py b/src/llama_stack_api/rag_tool.py new file mode 100644 index 000000000..b5edd51af --- /dev/null +++ b/src/llama_stack_api/rag_tool.py @@ -0,0 +1,168 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum, StrEnum +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Field, field_validator + +from llama_stack_api.common.content_types import URL, InterleavedContent + + +class RRFRanker(BaseModel): + """ + Reciprocal Rank Fusion (RRF) ranker configuration. + + :param type: The type of ranker, always "rrf" + :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. + Must be greater than 0 + """ + + type: Literal["rrf"] = "rrf" + impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance + + +class WeightedRanker(BaseModel): + """ + Weighted ranker configuration that combines vector and keyword scores. + + :param type: The type of ranker, always "weighted" + :param alpha: Weight factor between 0 and 1. + 0 means only use keyword scores, + 1 means only use vector scores, + values in between blend both scores. + """ + + type: Literal["weighted"] = "weighted" + alpha: float = Field( + default=0.5, + ge=0.0, + le=1.0, + description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.", + ) + + +Ranker = Annotated[ + RRFRanker | WeightedRanker, + Field(discriminator="type"), +] + + +class RAGDocument(BaseModel): + """ + A document to be used for document ingestion in the RAG Tool. + + :param document_id: The unique identifier for the document. + :param content: The content of the document. + :param mime_type: The MIME type of the document. + :param metadata: Additional metadata for the document. + """ + + document_id: str + content: InterleavedContent | URL + mime_type: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class RAGQueryResult(BaseModel): + """Result of a RAG query containing retrieved content and metadata. + + :param content: (Optional) The retrieved content from the query + :param metadata: Additional metadata about the query result + """ + + content: InterleavedContent | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class RAGQueryGenerator(Enum): + """Types of query generators for RAG systems. + + :cvar default: Default query generator using simple text processing + :cvar llm: LLM-based query generator for enhanced query understanding + :cvar custom: Custom query generator implementation + """ + + default = "default" + llm = "llm" + custom = "custom" + + +class RAGSearchMode(StrEnum): + """ + Search modes for RAG query retrieval: + - VECTOR: Uses vector similarity search for semantic matching + - KEYWORD: Uses keyword-based search for exact matching + - HYBRID: Combines both vector and keyword search for better results + """ + + VECTOR = "vector" + KEYWORD = "keyword" + HYBRID = "hybrid" + + +class DefaultRAGQueryGeneratorConfig(BaseModel): + """Configuration for the default RAG query generator. + + :param type: Type of query generator, always 'default' + :param separator: String separator used to join query terms + """ + + type: Literal["default"] = "default" + separator: str = " " + + +class LLMRAGQueryGeneratorConfig(BaseModel): + """Configuration for the LLM-based RAG query generator. + + :param type: Type of query generator, always 'llm' + :param model: Name of the language model to use for query generation + :param template: Template string for formatting the query generation prompt + """ + + type: Literal["llm"] = "llm" + model: str + template: str + + +RAGQueryGeneratorConfig = Annotated[ + DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig, + Field(discriminator="type"), +] + + +class RAGQueryConfig(BaseModel): + """ + Configuration for the RAG query generation. + + :param query_generator_config: Configuration for the query generator. + :param max_tokens_in_context: Maximum number of tokens in the context. + :param max_chunks: Maximum number of chunks to retrieve. + :param chunk_template: Template for formatting each retrieved chunk in the context. + Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). + Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" + :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector". + :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker. + """ + + # This config defines how a query is generated using the messages + # for memory bank retrieval. + query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig()) + max_tokens_in_context: int = 4096 + max_chunks: int = 5 + chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" + mode: RAGSearchMode | None = RAGSearchMode.VECTOR + ranker: Ranker | None = Field(default=None) # Only used for hybrid mode + + @field_validator("chunk_template") + def validate_chunk_template(cls, v: str) -> str: + if "{chunk.content}" not in v: + raise ValueError("chunk_template must contain {chunk.content}") + if "{index}" not in v: + raise ValueError("chunk_template must contain {index}") + if len(v) == 0: + raise ValueError("chunk_template must not be empty") + return v diff --git a/llama_stack/apis/resource.py b/src/llama_stack_api/resource.py similarity index 100% rename from llama_stack/apis/resource.py rename to src/llama_stack_api/resource.py diff --git a/src/llama_stack_api/safety.py b/src/llama_stack_api/safety.py new file mode 100644 index 000000000..7b4f2af5c --- /dev/null +++ b/src/llama_stack_api/safety.py @@ -0,0 +1,132 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum +from typing import Any, Protocol, runtime_checkable + +from pydantic import BaseModel, Field + +from llama_stack_api.inference import OpenAIMessageParam +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.shields import Shield +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +@json_schema_type +class ModerationObjectResults(BaseModel): + """A moderation object. + :param flagged: Whether any of the below categories are flagged. + :param categories: A list of the categories, and whether they are flagged or not. + :param category_applied_input_types: A list of the categories along with the input type(s) that the score applies to. + :param category_scores: A list of the categories along with their scores as predicted by model. + """ + + flagged: bool + categories: dict[str, bool] | None = None + category_applied_input_types: dict[str, list[str]] | None = None + category_scores: dict[str, float] | None = None + user_message: str | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +@json_schema_type +class ModerationObject(BaseModel): + """A moderation object. + :param id: The unique identifier for the moderation request. + :param model: The model used to generate the moderation results. + :param results: A list of moderation objects + """ + + id: str + model: str + results: list[ModerationObjectResults] + + +@json_schema_type +class ViolationLevel(Enum): + """Severity level of a safety violation. + + :cvar INFO: Informational level violation that does not require action + :cvar WARN: Warning level violation that suggests caution but allows continuation + :cvar ERROR: Error level violation that requires blocking or intervention + """ + + INFO = "info" + WARN = "warn" + ERROR = "error" + + +@json_schema_type +class SafetyViolation(BaseModel): + """Details of a safety violation detected by content moderation. + + :param violation_level: Severity level of the violation + :param user_message: (Optional) Message to convey to the user about the violation + :param metadata: Additional metadata including specific violation codes for debugging and telemetry + """ + + violation_level: ViolationLevel + + # what message should you convey to the user + user_message: str | None = None + + # additional metadata (including specific violation codes) more for + # debugging, telemetry + metadata: dict[str, Any] = Field(default_factory=dict) + + +@json_schema_type +class RunShieldResponse(BaseModel): + """Response from running a safety shield. + + :param violation: (Optional) Safety violation detected by the shield, if any + """ + + violation: SafetyViolation | None = None + + +class ShieldStore(Protocol): + async def get_shield(self, identifier: str) -> Shield: ... + + +@runtime_checkable +class Safety(Protocol): + """Safety + + OpenAI-compatible Moderations API. + """ + + shield_store: ShieldStore + + @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1) + async def run_shield( + self, + shield_id: str, + messages: list[OpenAIMessageParam], + params: dict[str, Any], + ) -> RunShieldResponse: + """Run shield. + + Run a shield. + + :param shield_id: The identifier of the shield to run. + :param messages: The messages to run the shield on. + :param params: The parameters of the shield. + :returns: A RunShieldResponse. + """ + ... + + @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1) + async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: + """Create moderation. + + Classifies if text and/or image inputs are potentially harmful. + :param input: Input (or inputs) to classify. + Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models. + :param model: (Optional) The content moderation model you would like to use. + :returns: A moderation object. + """ + ... diff --git a/src/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py new file mode 100644 index 000000000..162ef63fb --- /dev/null +++ b/src/llama_stack_api/schema_utils.py @@ -0,0 +1,208 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from typing import Any, Literal, TypeVar + + +class ExtraBodyField[T]: + """ + Marker annotation for parameters that arrive via extra_body in the client SDK. + + These parameters: + - Will NOT appear in the generated client SDK method signature + - WILL be documented in OpenAPI spec under x-llama-stack-extra-body-params + - MUST be passed via the extra_body parameter in client SDK calls + - WILL be available in server-side method signature with proper typing + + Example: + ```python + async def create_openai_response( + self, + input: str, + model: str, + shields: Annotated[ + list[str] | None, ExtraBodyField("List of shields to apply") + ] = None, + ) -> ResponseObject: + # shields is available here with proper typing + if shields: + print(f"Using shields: {shields}") + ``` + + Client usage: + ```python + client.responses.create( + input="hello", model="llama-3", extra_body={"shields": ["shield-1"]} + ) + ``` + """ + + def __init__(self, description: str | None = None): + self.description = description + + +SchemaSource = Literal["json_schema_type", "registered_schema", "dynamic_schema"] + + +@dataclass(frozen=True) +class SchemaInfo: + """Metadata describing a schema entry exposed to OpenAPI generation.""" + + name: str + type: Any + source: SchemaSource + + +_json_schema_types: dict[type, SchemaInfo] = {} + + +def json_schema_type(cls): + """ + Decorator to mark a Pydantic model for top-level component registration. + + Models marked with this decorator will be registered as top-level components + in the OpenAPI schema, while unmarked models will be inlined. + + This provides control over schema registration to avoid unnecessary indirection + for simple one-off types while keeping complex reusable types as components. + """ + cls._llama_stack_schema_type = True + schema_name = getattr(cls, "__name__", f"Anonymous_{id(cls)}") + cls._llama_stack_schema_name = schema_name + _json_schema_types.setdefault(cls, SchemaInfo(name=schema_name, type=cls, source="json_schema_type")) + return cls + + +# Global registries for schemas discoverable by the generator +_registered_schemas: dict[Any, SchemaInfo] = {} +_dynamic_schema_types: dict[type, SchemaInfo] = {} + + +def register_schema(schema_type, name: str | None = None): + """ + Register a schema type for top-level component registration. + + This replicates the behavior of strong_typing's register_schema function. + It's used for union types and other complex types that should appear as + top-level components in the OpenAPI schema. + + Args: + schema_type: The type to register (e.g., union types, Annotated types) + name: Optional name for the schema in the OpenAPI spec. If not provided, + uses the type's __name__ or a generated name. + """ + if name is None: + name = getattr(schema_type, "__name__", f"Anonymous_{id(schema_type)}") + + # Store the registration information in a global registry + # since union types don't allow setting attributes + _registered_schemas[schema_type] = SchemaInfo(name=name, type=schema_type, source="registered_schema") + + return schema_type + + +def get_registered_schema_info(schema_type: Any) -> SchemaInfo | None: + """Return the registration metadata for a schema type if present.""" + return _registered_schemas.get(schema_type) + + +def iter_registered_schema_types() -> Iterable[SchemaInfo]: + """Iterate over all explicitly registered schema entries.""" + return tuple(_registered_schemas.values()) + + +def iter_json_schema_types() -> Iterable[type]: + """Iterate over all Pydantic models decorated with @json_schema_type.""" + return tuple(info.type for info in _json_schema_types.values()) + + +def iter_dynamic_schema_types() -> Iterable[type]: + """Iterate over dynamic models registered at generation time.""" + return tuple(info.type for info in _dynamic_schema_types.values()) + + +def register_dynamic_schema_type(schema_type: type, name: str | None = None) -> type: + """Register a dynamic model generated at runtime for schema inclusion.""" + schema_name = name if name is not None else getattr(schema_type, "__name__", f"Anonymous_{id(schema_type)}") + _dynamic_schema_types[schema_type] = SchemaInfo(name=schema_name, type=schema_type, source="dynamic_schema") + return schema_type + + +def clear_dynamic_schema_types() -> None: + """Clear dynamic schema registrations.""" + _dynamic_schema_types.clear() + + +@dataclass +class WebMethod: + level: str | None = None + route: str | None = None + public: bool = False + request_examples: list[Any] | None = None + response_examples: list[Any] | None = None + method: str | None = None + raw_bytes_request_body: bool | None = False + # A descriptive name of the corresponding span created by tracing + descriptive_name: str | None = None + required_scope: str | None = None + deprecated: bool | None = False + require_authentication: bool | None = True + + +CallableT = TypeVar("CallableT", bound=Callable[..., Any]) + + +def webmethod( + route: str | None = None, + method: str | None = None, + level: str | None = None, + public: bool | None = False, + request_examples: list[Any] | None = None, + response_examples: list[Any] | None = None, + raw_bytes_request_body: bool | None = False, + descriptive_name: str | None = None, + required_scope: str | None = None, + deprecated: bool | None = False, + require_authentication: bool | None = True, +) -> Callable[[CallableT], CallableT]: + """ + Decorator that supplies additional metadata to an endpoint operation function. + + :param route: The URL path pattern associated with this operation which path parameters are substituted into. + :param public: True if the operation can be invoked without prior authentication. + :param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON. + :param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON. + :param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer'). + :param require_authentication: Whether this endpoint requires authentication (default True). + """ + + def wrap(func: CallableT) -> CallableT: + webmethod_obj = WebMethod( + route=route, + method=method, + level=level, + public=public or False, + request_examples=request_examples, + response_examples=response_examples, + raw_bytes_request_body=raw_bytes_request_body, + descriptive_name=descriptive_name, + required_scope=required_scope, + deprecated=deprecated, + require_authentication=require_authentication if require_authentication is not None else True, + ) + + # Store all webmethods in a list to support multiple decorators + if not hasattr(func, "__webmethods__"): + func.__webmethods__ = [] # type: ignore + func.__webmethods__.append(webmethod_obj) # type: ignore + + # Keep the last one as __webmethod__ for backwards compatibility + func.__webmethod__ = webmethod_obj # type: ignore + return func + + return wrap diff --git a/src/llama_stack_api/scoring.py b/src/llama_stack_api/scoring.py new file mode 100644 index 000000000..47d144d21 --- /dev/null +++ b/src/llama_stack_api/scoring.py @@ -0,0 +1,93 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams +from llama_stack_api.version import LLAMA_STACK_API_V1 + +# mapping of metric to value +ScoringResultRow = dict[str, Any] + + +@json_schema_type +class ScoringResult(BaseModel): + """ + A scoring result for a single row. + + :param score_rows: The scoring result for each row. Each row is a map of column name to value. + :param aggregated_results: Map of metric name to aggregated value + """ + + score_rows: list[ScoringResultRow] + # aggregated metrics to value + aggregated_results: dict[str, Any] + + +@json_schema_type +class ScoreBatchResponse(BaseModel): + """Response from batch scoring operations on datasets. + + :param dataset_id: (Optional) The identifier of the dataset that was scored + :param results: A map of scoring function name to ScoringResult + """ + + dataset_id: str | None = None + results: dict[str, ScoringResult] + + +@json_schema_type +class ScoreResponse(BaseModel): + """ + The response from scoring. + + :param results: A map of scoring function name to ScoringResult. + """ + + # each key in the dict is a scoring function name + results: dict[str, ScoringResult] + + +class ScoringFunctionStore(Protocol): + def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ... + + +@runtime_checkable +class Scoring(Protocol): + scoring_function_store: ScoringFunctionStore + + @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1) + async def score_batch( + self, + dataset_id: str, + scoring_functions: dict[str, ScoringFnParams | None], + save_results_dataset: bool = False, + ) -> ScoreBatchResponse: + """Score a batch of rows. + + :param dataset_id: The ID of the dataset to score. + :param scoring_functions: The scoring functions to use for the scoring. + :param save_results_dataset: Whether to save the results to a dataset. + :returns: A ScoreBatchResponse. + """ + ... + + @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1) + async def score( + self, + input_rows: list[dict[str, Any]], + scoring_functions: dict[str, ScoringFnParams | None], + ) -> ScoreResponse: + """Score a list of rows. + + :param input_rows: The rows to score. + :param scoring_functions: The scoring functions to use for the scoring. + :returns: A ScoreResponse object containing rows and aggregated results. + """ + ... diff --git a/src/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py new file mode 100644 index 000000000..12051c20c --- /dev/null +++ b/src/llama_stack_api/scoring_functions.py @@ -0,0 +1,211 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# TODO: use enum.StrEnum when we drop support for python 3.10 +from enum import StrEnum +from typing import ( + Annotated, + Any, + Literal, + Protocol, + runtime_checkable, +) + +from pydantic import BaseModel, Field + +from llama_stack_api.common.type_system import ParamType +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +# Perhaps more structure can be imposed on these functions. Maybe they could be associated +# with standard metrics so they can be rolled up? +@json_schema_type +class ScoringFnParamsType(StrEnum): + """Types of scoring function parameter configurations. + :cvar llm_as_judge: Use an LLM model to evaluate and score responses + :cvar regex_parser: Use regex patterns to extract and score specific parts of responses + :cvar basic: Basic scoring with simple aggregation functions + """ + + llm_as_judge = "llm_as_judge" + regex_parser = "regex_parser" + basic = "basic" + + +@json_schema_type +class AggregationFunctionType(StrEnum): + """Types of aggregation functions for scoring results. + :cvar average: Calculate the arithmetic mean of scores + :cvar weighted_average: Calculate a weighted average of scores + :cvar median: Calculate the median value of scores + :cvar categorical_count: Count occurrences of categorical values + :cvar accuracy: Calculate accuracy as the proportion of correct answers + """ + + average = "average" + weighted_average = "weighted_average" + median = "median" + categorical_count = "categorical_count" + accuracy = "accuracy" + + +@json_schema_type +class LLMAsJudgeScoringFnParams(BaseModel): + """Parameters for LLM-as-judge scoring function configuration. + :param type: The type of scoring function parameters, always llm_as_judge + :param judge_model: Identifier of the LLM model to use as a judge for scoring + :param prompt_template: (Optional) Custom prompt template for the judge model + :param judge_score_regexes: Regexes to extract the answer from generated response + :param aggregation_functions: Aggregation functions to apply to the scores of each row + """ + + type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge + judge_model: str + prompt_template: str | None = None + judge_score_regexes: list[str] = Field( + description="Regexes to extract the answer from generated response", + default_factory=lambda: [], + ) + aggregation_functions: list[AggregationFunctionType] = Field( + description="Aggregation functions to apply to the scores of each row", + default_factory=lambda: [], + ) + + +@json_schema_type +class RegexParserScoringFnParams(BaseModel): + """Parameters for regex parser scoring function configuration. + :param type: The type of scoring function parameters, always regex_parser + :param parsing_regexes: Regex to extract the answer from generated response + :param aggregation_functions: Aggregation functions to apply to the scores of each row + """ + + type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser + parsing_regexes: list[str] = Field( + description="Regex to extract the answer from generated response", + default_factory=lambda: [], + ) + aggregation_functions: list[AggregationFunctionType] = Field( + description="Aggregation functions to apply to the scores of each row", + default_factory=lambda: [], + ) + + +@json_schema_type +class BasicScoringFnParams(BaseModel): + """Parameters for basic scoring function configuration. + :param type: The type of scoring function parameters, always basic + :param aggregation_functions: Aggregation functions to apply to the scores of each row + """ + + type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic + aggregation_functions: list[AggregationFunctionType] = Field( + description="Aggregation functions to apply to the scores of each row", + default_factory=list, + ) + + +ScoringFnParams = Annotated[ + LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams, + Field(discriminator="type"), +] +register_schema(ScoringFnParams, name="ScoringFnParams") + + +class CommonScoringFnFields(BaseModel): + description: str | None = None + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Any additional metadata for this definition", + ) + return_type: ParamType = Field( + description="The return type of the deterministic function", + ) + params: ScoringFnParams | None = Field( + description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval", + default=None, + ) + + +@json_schema_type +class ScoringFn(CommonScoringFnFields, Resource): + """A scoring function resource for evaluating model outputs. + :param type: The resource type, always scoring_function + """ + + type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function + + @property + def scoring_fn_id(self) -> str: + return self.identifier + + @property + def provider_scoring_fn_id(self) -> str | None: + return self.provider_resource_id + + +class ScoringFnInput(CommonScoringFnFields, BaseModel): + scoring_fn_id: str + provider_id: str | None = None + provider_scoring_fn_id: str | None = None + + +@json_schema_type +class ListScoringFunctionsResponse(BaseModel): + data: list[ScoringFn] + + +@runtime_checkable +class ScoringFunctions(Protocol): + @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1) + async def list_scoring_functions(self) -> ListScoringFunctionsResponse: + """List all scoring functions. + + :returns: A ListScoringFunctionsResponse. + """ + ... + + @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1) + async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: + """Get a scoring function by its ID. + + :param scoring_fn_id: The ID of the scoring function to get. + :returns: A ScoringFn. + """ + ... + + @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + async def register_scoring_function( + self, + scoring_fn_id: str, + description: str, + return_type: ParamType, + provider_scoring_fn_id: str | None = None, + provider_id: str | None = None, + params: ScoringFnParams | None = None, + ) -> None: + """Register a scoring function. + + :param scoring_fn_id: The ID of the scoring function to register. + :param description: The description of the scoring function. + :param return_type: The return type of the scoring function. + :param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function. + :param provider_id: The ID of the provider to use for the scoring function. + :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval. + """ + ... + + @webmethod( + route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True + ) + async def unregister_scoring_function(self, scoring_fn_id: str) -> None: + """Unregister a scoring function. + + :param scoring_fn_id: The ID of the scoring function to unregister. + """ + ... diff --git a/src/llama_stack_api/shields.py b/src/llama_stack_api/shields.py new file mode 100644 index 000000000..36ad2351b --- /dev/null +++ b/src/llama_stack_api/shields.py @@ -0,0 +1,93 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Literal, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +class CommonShieldFields(BaseModel): + params: dict[str, Any] | None = None + + +@json_schema_type +class Shield(CommonShieldFields, Resource): + """A safety shield resource that can be used to check content. + + :param params: (Optional) Configuration parameters for the shield + :param type: The resource type, always shield + """ + + type: Literal[ResourceType.shield] = ResourceType.shield + + @property + def shield_id(self) -> str: + return self.identifier + + @property + def provider_shield_id(self) -> str | None: + return self.provider_resource_id + + +class ShieldInput(CommonShieldFields): + shield_id: str + provider_id: str | None = None + provider_shield_id: str | None = None + + +@json_schema_type +class ListShieldsResponse(BaseModel): + data: list[Shield] + + +@runtime_checkable +class Shields(Protocol): + @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) + async def list_shields(self) -> ListShieldsResponse: + """List all shields. + + :returns: A ListShieldsResponse. + """ + ... + + @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1) + async def get_shield(self, identifier: str) -> Shield: + """Get a shield by its identifier. + + :param identifier: The identifier of the shield to get. + :returns: A Shield. + """ + ... + + @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + async def register_shield( + self, + shield_id: str, + provider_shield_id: str | None = None, + provider_id: str | None = None, + params: dict[str, Any] | None = None, + ) -> Shield: + """Register a shield. + + :param shield_id: The identifier of the shield to register. + :param provider_shield_id: The identifier of the shield in the provider. + :param provider_id: The identifier of the provider. + :param params: The parameters of the shield. + :returns: A Shield. + """ + ... + + @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) + async def unregister_shield(self, identifier: str) -> None: + """Unregister a shield. + + :param identifier: The identifier of the shield to unregister. + """ + ... diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py new file mode 100644 index 000000000..2a2a4304c --- /dev/null +++ b/src/llama_stack_api/tools.py @@ -0,0 +1,226 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from enum import Enum +from typing import Any, Literal, Protocol + +from pydantic import BaseModel +from typing_extensions import runtime_checkable + +from llama_stack_api.common.content_types import URL, InterleavedContent +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +@json_schema_type +class ToolDef(BaseModel): + """Tool definition used in runtime contexts. + + :param name: Name of the tool + :param description: (Optional) Human-readable description of what the tool does + :param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema) + :param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema) + :param metadata: (Optional) Additional metadata about the tool + :param toolgroup_id: (Optional) ID of the tool group this tool belongs to + """ + + toolgroup_id: str | None = None + name: str + description: str | None = None + input_schema: dict[str, Any] | None = None + output_schema: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + + +@json_schema_type +class ToolGroupInput(BaseModel): + """Input data for registering a tool group. + + :param toolgroup_id: Unique identifier for the tool group + :param provider_id: ID of the provider that will handle this tool group + :param args: (Optional) Additional arguments to pass to the provider + :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools + """ + + toolgroup_id: str + provider_id: str + args: dict[str, Any] | None = None + mcp_endpoint: URL | None = None + + +@json_schema_type +class ToolGroup(Resource): + """A group of related tools managed together. + + :param type: Type of resource, always 'tool_group' + :param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools + :param args: (Optional) Additional arguments for the tool group + """ + + type: Literal[ResourceType.tool_group] = ResourceType.tool_group + mcp_endpoint: URL | None = None + args: dict[str, Any] | None = None + + +@json_schema_type +class ToolInvocationResult(BaseModel): + """Result of a tool invocation. + + :param content: (Optional) The output content from the tool execution + :param error_message: (Optional) Error message if the tool execution failed + :param error_code: (Optional) Numeric error code if the tool execution failed + :param metadata: (Optional) Additional metadata about the tool execution + """ + + content: InterleavedContent | None = None + error_message: str | None = None + error_code: int | None = None + metadata: dict[str, Any] | None = None + + +class ToolStore(Protocol): + async def get_tool(self, tool_name: str) -> ToolDef: ... + async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ... + + +@json_schema_type +class ListToolGroupsResponse(BaseModel): + """Response containing a list of tool groups. + + :param data: List of tool groups + """ + + data: list[ToolGroup] + + +@json_schema_type +class ListToolDefsResponse(BaseModel): + """Response containing a list of tool definitions. + + :param data: List of tool definitions + """ + + data: list[ToolDef] + + +@runtime_checkable +class ToolGroups(Protocol): + @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + async def register_tool_group( + self, + toolgroup_id: str, + provider_id: str, + mcp_endpoint: URL | None = None, + args: dict[str, Any] | None = None, + ) -> None: + """Register a tool group. + + :param toolgroup_id: The ID of the tool group to register. + :param provider_id: The ID of the provider to use for the tool group. + :param mcp_endpoint: The MCP endpoint to use for the tool group. + :param args: A dictionary of arguments to pass to the tool group. + """ + ... + + @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + async def get_tool_group( + self, + toolgroup_id: str, + ) -> ToolGroup: + """Get a tool group by its ID. + + :param toolgroup_id: The ID of the tool group to get. + :returns: A ToolGroup. + """ + ... + + @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + async def list_tool_groups(self) -> ListToolGroupsResponse: + """List tool groups with optional provider. + + :returns: A ListToolGroupsResponse. + """ + ... + + @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse: + """List tools with optional tool group. + + :param toolgroup_id: The ID of the tool group to list tools for. + :returns: A ListToolDefsResponse. + """ + ... + + @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + async def get_tool( + self, + tool_name: str, + ) -> ToolDef: + """Get a tool by its name. + + :param tool_name: The name of the tool to get. + :returns: A ToolDef. + """ + ... + + @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) + async def unregister_toolgroup( + self, + toolgroup_id: str, + ) -> None: + """Unregister a tool group. + + :param toolgroup_id: The ID of the tool group to unregister. + """ + ... + + +class SpecialToolGroup(Enum): + """Special tool groups with predefined functionality. + + :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval + """ + + rag_tool = "rag_tool" + + +@runtime_checkable +class ToolRuntime(Protocol): + tool_store: ToolStore | None = None + + # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. + @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + async def list_runtime_tools( + self, + tool_group_id: str | None = None, + mcp_endpoint: URL | None = None, + authorization: str | None = None, + ) -> ListToolDefsResponse: + """List all tools in the runtime. + + :param tool_group_id: The ID of the tool group to list tools for. + :param mcp_endpoint: The MCP endpoint to use for the tool group. + :param authorization: (Optional) OAuth access token for authenticating with the MCP server. + :returns: A ListToolDefsResponse. + """ + ... + + @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + async def invoke_tool( + self, + tool_name: str, + kwargs: dict[str, Any], + authorization: str | None = None, + ) -> ToolInvocationResult: + """Run a tool with the given arguments. + + :param tool_name: The name of the tool to invoke. + :param kwargs: A dictionary of arguments to pass to the tool. + :param authorization: (Optional) OAuth access token for authenticating with the MCP server. + :returns: A ToolInvocationResult. + """ + ... diff --git a/src/llama_stack_api/uv.lock b/src/llama_stack_api/uv.lock new file mode 100644 index 000000000..d61eb9be7 --- /dev/null +++ b/src/llama_stack_api/uv.lock @@ -0,0 +1,498 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "attrs" +version = "25.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, +] + +[[package]] +name = "certifi" +version = "2025.11.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.72.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "importlib-metadata" +version = "8.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, +] + +[[package]] +name = "jsonschema" +version = "4.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "llama-stack-api" +version = "0.4.0.dev0" +source = { editable = "." } +dependencies = [ + { name = "jsonschema" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "jsonschema" }, + { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, + { name = "pydantic", specifier = ">=2.11.9" }, +] + +[[package]] +name = "opentelemetry-api" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.59b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" }, +] + +[[package]] +name = "protobuf" +version = "6.33.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/03/a1440979a3f74f16cab3b75b0da1a1a7f922d56a8ddea96092391998edc0/protobuf-6.33.1.tar.gz", hash = "sha256:97f65757e8d09870de6fd973aeddb92f85435607235d20b2dfed93405d00c85b", size = 443432, upload-time = "2025-11-13T16:44:18.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/f1/446a9bbd2c60772ca36556bac8bfde40eceb28d9cc7838755bc41e001d8f/protobuf-6.33.1-cp310-abi3-win32.whl", hash = "sha256:f8d3fdbc966aaab1d05046d0240dd94d40f2a8c62856d41eaa141ff64a79de6b", size = 425593, upload-time = "2025-11-13T16:44:06.275Z" }, + { url = "https://files.pythonhosted.org/packages/a6/79/8780a378c650e3df849b73de8b13cf5412f521ca2ff9b78a45c247029440/protobuf-6.33.1-cp310-abi3-win_amd64.whl", hash = "sha256:923aa6d27a92bf44394f6abf7ea0500f38769d4b07f4be41cb52bd8b1123b9ed", size = 436883, upload-time = "2025-11-13T16:44:09.222Z" }, + { url = "https://files.pythonhosted.org/packages/cd/93/26213ff72b103ae55bb0d73e7fb91ea570ef407c3ab4fd2f1f27cac16044/protobuf-6.33.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:fe34575f2bdde76ac429ec7b570235bf0c788883e70aee90068e9981806f2490", size = 427522, upload-time = "2025-11-13T16:44:10.475Z" }, + { url = "https://files.pythonhosted.org/packages/c2/32/df4a35247923393aa6b887c3b3244a8c941c32a25681775f96e2b418f90e/protobuf-6.33.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:f8adba2e44cde2d7618996b3fc02341f03f5bc3f2748be72dc7b063319276178", size = 324445, upload-time = "2025-11-13T16:44:11.869Z" }, + { url = "https://files.pythonhosted.org/packages/8e/d0/d796e419e2ec93d2f3fa44888861c3f88f722cde02b7c3488fcc6a166820/protobuf-6.33.1-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:0f4cf01222c0d959c2b399142deb526de420be8236f22c71356e2a544e153c53", size = 339161, upload-time = "2025-11-13T16:44:12.778Z" }, + { url = "https://files.pythonhosted.org/packages/1d/2a/3c5f05a4af06649547027d288747f68525755de692a26a7720dced3652c0/protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:8fd7d5e0eb08cd5b87fd3df49bc193f5cfd778701f47e11d127d0afc6c39f1d1", size = 323171, upload-time = "2025-11-13T16:44:14.035Z" }, + { url = "https://files.pythonhosted.org/packages/08/b4/46310463b4f6ceef310f8348786f3cff181cea671578e3d9743ba61a459e/protobuf-6.33.1-py3-none-any.whl", hash = "sha256:d595a9fd694fdeb061a62fbe10eb039cc1e444df81ec9bb70c7fc59ebcb1eafa", size = 170477, upload-time = "2025-11-13T16:44:17.633Z" }, +] + +[[package]] +name = "pydantic" +version = "2.12.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + +[[package]] +name = "rpds-py" +version = "0.28.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/48/dc/95f074d43452b3ef5d06276696ece4b3b5d696e7c9ad7173c54b1390cd70/rpds_py-0.28.0.tar.gz", hash = "sha256:abd4df20485a0983e2ca334a216249b6186d6e3c1627e106651943dbdb791aea", size = 27419, upload-time = "2025-10-22T22:24:29.327Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/5c/6c3936495003875fe7b14f90ea812841a08fca50ab26bd840e924097d9c8/rpds_py-0.28.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6b4f28583a4f247ff60cd7bdda83db8c3f5b05a7a82ff20dd4b078571747708f", size = 366439, upload-time = "2025-10-22T22:22:04.525Z" }, + { url = "https://files.pythonhosted.org/packages/56/f9/a0f1ca194c50aa29895b442771f036a25b6c41a35e4f35b1a0ea713bedae/rpds_py-0.28.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d678e91b610c29c4b3d52a2c148b641df2b4676ffe47c59f6388d58b99cdc424", size = 348170, upload-time = "2025-10-22T22:22:06.397Z" }, + { url = "https://files.pythonhosted.org/packages/18/ea/42d243d3a586beb72c77fa5def0487daf827210069a95f36328e869599ea/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e819e0e37a44a78e1383bf1970076e2ccc4dc8c2bbaa2f9bd1dc987e9afff628", size = 378838, upload-time = "2025-10-22T22:22:07.932Z" }, + { url = "https://files.pythonhosted.org/packages/e7/78/3de32e18a94791af8f33601402d9d4f39613136398658412a4e0b3047327/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5ee514e0f0523db5d3fb171f397c54875dbbd69760a414dccf9d4d7ad628b5bd", size = 393299, upload-time = "2025-10-22T22:22:09.435Z" }, + { url = "https://files.pythonhosted.org/packages/13/7e/4bdb435afb18acea2eb8a25ad56b956f28de7c59f8a1d32827effa0d4514/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3fa06d27fdcee47f07a39e02862da0100cb4982508f5ead53ec533cd5fe55e", size = 518000, upload-time = "2025-10-22T22:22:11.326Z" }, + { url = "https://files.pythonhosted.org/packages/31/d0/5f52a656875cdc60498ab035a7a0ac8f399890cc1ee73ebd567bac4e39ae/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46959ef2e64f9e4a41fc89aa20dbca2b85531f9a72c21099a3360f35d10b0d5a", size = 408746, upload-time = "2025-10-22T22:22:13.143Z" }, + { url = "https://files.pythonhosted.org/packages/3e/cd/49ce51767b879cde77e7ad9fae164ea15dce3616fe591d9ea1df51152706/rpds_py-0.28.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8455933b4bcd6e83fde3fefc987a023389c4b13f9a58c8d23e4b3f6d13f78c84", size = 386379, upload-time = "2025-10-22T22:22:14.602Z" }, + { url = "https://files.pythonhosted.org/packages/6a/99/e4e1e1ee93a98f72fc450e36c0e4d99c35370220e815288e3ecd2ec36a2a/rpds_py-0.28.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:ad50614a02c8c2962feebe6012b52f9802deec4263946cddea37aaf28dd25a66", size = 401280, upload-time = "2025-10-22T22:22:16.063Z" }, + { url = "https://files.pythonhosted.org/packages/61/35/e0c6a57488392a8b319d2200d03dad2b29c0db9996f5662c3b02d0b86c02/rpds_py-0.28.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5deca01b271492553fdb6c7fd974659dce736a15bae5dad7ab8b93555bceb28", size = 412365, upload-time = "2025-10-22T22:22:17.504Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6a/841337980ea253ec797eb084665436007a1aad0faac1ba097fb906c5f69c/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:735f8495a13159ce6a0d533f01e8674cec0c57038c920495f87dcb20b3ddb48a", size = 559573, upload-time = "2025-10-22T22:22:19.108Z" }, + { url = "https://files.pythonhosted.org/packages/e7/5e/64826ec58afd4c489731f8b00729c5f6afdb86f1df1df60bfede55d650bb/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:961ca621ff10d198bbe6ba4957decca61aa2a0c56695384c1d6b79bf61436df5", size = 583973, upload-time = "2025-10-22T22:22:20.768Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ee/44d024b4843f8386a4eeaa4c171b3d31d55f7177c415545fd1a24c249b5d/rpds_py-0.28.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2374e16cc9131022e7d9a8f8d65d261d9ba55048c78f3b6e017971a4f5e6353c", size = 553800, upload-time = "2025-10-22T22:22:22.25Z" }, + { url = "https://files.pythonhosted.org/packages/7d/89/33e675dccff11a06d4d85dbb4d1865f878d5020cbb69b2c1e7b2d3f82562/rpds_py-0.28.0-cp312-cp312-win32.whl", hash = "sha256:d15431e334fba488b081d47f30f091e5d03c18527c325386091f31718952fe08", size = 216954, upload-time = "2025-10-22T22:22:24.105Z" }, + { url = "https://files.pythonhosted.org/packages/af/36/45f6ebb3210887e8ee6dbf1bc710ae8400bb417ce165aaf3024b8360d999/rpds_py-0.28.0-cp312-cp312-win_amd64.whl", hash = "sha256:a410542d61fc54710f750d3764380b53bf09e8c4edbf2f9141a82aa774a04f7c", size = 227844, upload-time = "2025-10-22T22:22:25.551Z" }, + { url = "https://files.pythonhosted.org/packages/57/91/f3fb250d7e73de71080f9a221d19bd6a1c1eb0d12a1ea26513f6c1052ad6/rpds_py-0.28.0-cp312-cp312-win_arm64.whl", hash = "sha256:1f0cfd1c69e2d14f8c892b893997fa9a60d890a0c8a603e88dca4955f26d1edd", size = 217624, upload-time = "2025-10-22T22:22:26.914Z" }, + { url = "https://files.pythonhosted.org/packages/d3/03/ce566d92611dfac0085c2f4b048cd53ed7c274a5c05974b882a908d540a2/rpds_py-0.28.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e9e184408a0297086f880556b6168fa927d677716f83d3472ea333b42171ee3b", size = 366235, upload-time = "2025-10-22T22:22:28.397Z" }, + { url = "https://files.pythonhosted.org/packages/00/34/1c61da1b25592b86fd285bd7bd8422f4c9d748a7373b46126f9ae792a004/rpds_py-0.28.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:edd267266a9b0448f33dc465a97cfc5d467594b600fe28e7fa2f36450e03053a", size = 348241, upload-time = "2025-10-22T22:22:30.171Z" }, + { url = "https://files.pythonhosted.org/packages/fc/00/ed1e28616848c61c493a067779633ebf4b569eccaacf9ccbdc0e7cba2b9d/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85beb8b3f45e4e32f6802fb6cd6b17f615ef6c6a52f265371fb916fae02814aa", size = 378079, upload-time = "2025-10-22T22:22:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/11/b2/ccb30333a16a470091b6e50289adb4d3ec656fd9951ba8c5e3aaa0746a67/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d2412be8d00a1b895f8ad827cc2116455196e20ed994bb704bf138fe91a42724", size = 393151, upload-time = "2025-10-22T22:22:33.453Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d0/73e2217c3ee486d555cb84920597480627d8c0240ff3062005c6cc47773e/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf128350d384b777da0e68796afdcebc2e9f63f0e9f242217754e647f6d32491", size = 517520, upload-time = "2025-10-22T22:22:34.949Z" }, + { url = "https://files.pythonhosted.org/packages/c4/91/23efe81c700427d0841a4ae7ea23e305654381831e6029499fe80be8a071/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2036d09b363aa36695d1cc1a97b36865597f4478470b0697b5ee9403f4fe399", size = 408699, upload-time = "2025-10-22T22:22:36.584Z" }, + { url = "https://files.pythonhosted.org/packages/ca/ee/a324d3198da151820a326c1f988caaa4f37fc27955148a76fff7a2d787a9/rpds_py-0.28.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8e1e9be4fa6305a16be628959188e4fd5cd6f1b0e724d63c6d8b2a8adf74ea6", size = 385720, upload-time = "2025-10-22T22:22:38.014Z" }, + { url = "https://files.pythonhosted.org/packages/19/ad/e68120dc05af8b7cab4a789fccd8cdcf0fe7e6581461038cc5c164cd97d2/rpds_py-0.28.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0a403460c9dd91a7f23fc3188de6d8977f1d9603a351d5db6cf20aaea95b538d", size = 401096, upload-time = "2025-10-22T22:22:39.869Z" }, + { url = "https://files.pythonhosted.org/packages/99/90/c1e070620042459d60df6356b666bb1f62198a89d68881816a7ed121595a/rpds_py-0.28.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7366b6553cdc805abcc512b849a519167db8f5e5c3472010cd1228b224265cb", size = 411465, upload-time = "2025-10-22T22:22:41.395Z" }, + { url = "https://files.pythonhosted.org/packages/68/61/7c195b30d57f1b8d5970f600efee72a4fad79ec829057972e13a0370fd24/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b43c6a3726efd50f18d8120ec0551241c38785b68952d240c45ea553912ac41", size = 558832, upload-time = "2025-10-22T22:22:42.871Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3d/06f3a718864773f69941d4deccdf18e5e47dd298b4628062f004c10f3b34/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0cb7203c7bc69d7c1585ebb33a2e6074492d2fc21ad28a7b9d40457ac2a51ab7", size = 583230, upload-time = "2025-10-22T22:22:44.877Z" }, + { url = "https://files.pythonhosted.org/packages/66/df/62fc783781a121e77fee9a21ead0a926f1b652280a33f5956a5e7833ed30/rpds_py-0.28.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a52a5169c664dfb495882adc75c304ae1d50df552fbd68e100fdc719dee4ff9", size = 553268, upload-time = "2025-10-22T22:22:46.441Z" }, + { url = "https://files.pythonhosted.org/packages/84/85/d34366e335140a4837902d3dea89b51f087bd6a63c993ebdff59e93ee61d/rpds_py-0.28.0-cp313-cp313-win32.whl", hash = "sha256:2e42456917b6687215b3e606ab46aa6bca040c77af7df9a08a6dcfe8a4d10ca5", size = 217100, upload-time = "2025-10-22T22:22:48.342Z" }, + { url = "https://files.pythonhosted.org/packages/3c/1c/f25a3f3752ad7601476e3eff395fe075e0f7813fbb9862bd67c82440e880/rpds_py-0.28.0-cp313-cp313-win_amd64.whl", hash = "sha256:e0a0311caedc8069d68fc2bf4c9019b58a2d5ce3cd7cb656c845f1615b577e1e", size = 227759, upload-time = "2025-10-22T22:22:50.219Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d6/5f39b42b99615b5bc2f36ab90423ea404830bdfee1c706820943e9a645eb/rpds_py-0.28.0-cp313-cp313-win_arm64.whl", hash = "sha256:04c1b207ab8b581108801528d59ad80aa83bb170b35b0ddffb29c20e411acdc1", size = 217326, upload-time = "2025-10-22T22:22:51.647Z" }, + { url = "https://files.pythonhosted.org/packages/5c/8b/0c69b72d1cee20a63db534be0df271effe715ef6c744fdf1ff23bb2b0b1c/rpds_py-0.28.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f296ea3054e11fc58ad42e850e8b75c62d9a93a9f981ad04b2e5ae7d2186ff9c", size = 355736, upload-time = "2025-10-22T22:22:53.211Z" }, + { url = "https://files.pythonhosted.org/packages/f7/6d/0c2ee773cfb55c31a8514d2cece856dd299170a49babd50dcffb15ddc749/rpds_py-0.28.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5a7306c19b19005ad98468fcefeb7100b19c79fc23a5f24a12e06d91181193fa", size = 342677, upload-time = "2025-10-22T22:22:54.723Z" }, + { url = "https://files.pythonhosted.org/packages/e2/1c/22513ab25a27ea205144414724743e305e8153e6abe81833b5e678650f5a/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d9b86aa501fed9862a443c5c3116f6ead8bc9296185f369277c42542bd646b", size = 371847, upload-time = "2025-10-22T22:22:56.295Z" }, + { url = "https://files.pythonhosted.org/packages/60/07/68e6ccdb4b05115ffe61d31afc94adef1833d3a72f76c9632d4d90d67954/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e5bbc701eff140ba0e872691d573b3d5d30059ea26e5785acba9132d10c8c31d", size = 381800, upload-time = "2025-10-22T22:22:57.808Z" }, + { url = "https://files.pythonhosted.org/packages/73/bf/6d6d15df80781d7f9f368e7c1a00caf764436518c4877fb28b029c4624af/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5690671cd672a45aa8616d7374fdf334a1b9c04a0cac3c854b1136e92374fe", size = 518827, upload-time = "2025-10-22T22:22:59.826Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d3/2decbb2976cc452cbf12a2b0aaac5f1b9dc5dd9d1f7e2509a3ee00421249/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f1d92ecea4fa12f978a367c32a5375a1982834649cdb96539dcdc12e609ab1a", size = 399471, upload-time = "2025-10-22T22:23:01.968Z" }, + { url = "https://files.pythonhosted.org/packages/b1/2c/f30892f9e54bd02e5faca3f6a26d6933c51055e67d54818af90abed9748e/rpds_py-0.28.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d252db6b1a78d0a3928b6190156042d54c93660ce4d98290d7b16b5296fb7cc", size = 377578, upload-time = "2025-10-22T22:23:03.52Z" }, + { url = "https://files.pythonhosted.org/packages/f0/5d/3bce97e5534157318f29ac06bf2d279dae2674ec12f7cb9c12739cee64d8/rpds_py-0.28.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d61b355c3275acb825f8777d6c4505f42b5007e357af500939d4a35b19177259", size = 390482, upload-time = "2025-10-22T22:23:05.391Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f0/886bd515ed457b5bd93b166175edb80a0b21a210c10e993392127f1e3931/rpds_py-0.28.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acbe5e8b1026c0c580d0321c8aae4b0a1e1676861d48d6e8c6586625055b606a", size = 402447, upload-time = "2025-10-22T22:23:06.93Z" }, + { url = "https://files.pythonhosted.org/packages/42/b5/71e8777ac55e6af1f4f1c05b47542a1eaa6c33c1cf0d300dca6a1c6e159a/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8aa23b6f0fc59b85b4c7d89ba2965af274346f738e8d9fc2455763602e62fd5f", size = 552385, upload-time = "2025-10-22T22:23:08.557Z" }, + { url = "https://files.pythonhosted.org/packages/5d/cb/6ca2d70cbda5a8e36605e7788c4aa3bea7c17d71d213465a5a675079b98d/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7b14b0c680286958817c22d76fcbca4800ddacef6f678f3a7c79a1fe7067fe37", size = 575642, upload-time = "2025-10-22T22:23:10.348Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d4/407ad9960ca7856d7b25c96dcbe019270b5ffdd83a561787bc682c797086/rpds_py-0.28.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bcf1d210dfee61a6c86551d67ee1031899c0fdbae88b2d44a569995d43797712", size = 544507, upload-time = "2025-10-22T22:23:12.434Z" }, + { url = "https://files.pythonhosted.org/packages/51/31/2f46fe0efcac23fbf5797c6b6b7e1c76f7d60773e525cb65fcbc582ee0f2/rpds_py-0.28.0-cp313-cp313t-win32.whl", hash = "sha256:3aa4dc0fdab4a7029ac63959a3ccf4ed605fee048ba67ce89ca3168da34a1342", size = 205376, upload-time = "2025-10-22T22:23:13.979Z" }, + { url = "https://files.pythonhosted.org/packages/92/e4/15947bda33cbedfc134490a41841ab8870a72a867a03d4969d886f6594a2/rpds_py-0.28.0-cp313-cp313t-win_amd64.whl", hash = "sha256:7b7d9d83c942855e4fdcfa75d4f96f6b9e272d42fffcb72cd4bb2577db2e2907", size = 215907, upload-time = "2025-10-22T22:23:15.5Z" }, + { url = "https://files.pythonhosted.org/packages/08/47/ffe8cd7a6a02833b10623bf765fbb57ce977e9a4318ca0e8cf97e9c3d2b3/rpds_py-0.28.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:dcdcb890b3ada98a03f9f2bb108489cdc7580176cb73b4f2d789e9a1dac1d472", size = 353830, upload-time = "2025-10-22T22:23:17.03Z" }, + { url = "https://files.pythonhosted.org/packages/f9/9f/890f36cbd83a58491d0d91ae0db1702639edb33fb48eeb356f80ecc6b000/rpds_py-0.28.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f274f56a926ba2dc02976ca5b11c32855cbd5925534e57cfe1fda64e04d1add2", size = 341819, upload-time = "2025-10-22T22:23:18.57Z" }, + { url = "https://files.pythonhosted.org/packages/09/e3/921eb109f682aa24fb76207698fbbcf9418738f35a40c21652c29053f23d/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fe0438ac4a29a520ea94c8c7f1754cdd8feb1bc490dfda1bfd990072363d527", size = 373127, upload-time = "2025-10-22T22:23:20.216Z" }, + { url = "https://files.pythonhosted.org/packages/23/13/bce4384d9f8f4989f1a9599c71b7a2d877462e5fd7175e1f69b398f729f4/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8a358a32dd3ae50e933347889b6af9a1bdf207ba5d1a3f34e1a38cd3540e6733", size = 382767, upload-time = "2025-10-22T22:23:21.787Z" }, + { url = "https://files.pythonhosted.org/packages/23/e1/579512b2d89a77c64ccef5a0bc46a6ef7f72ae0cf03d4b26dcd52e57ee0a/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e80848a71c78aa328fefaba9c244d588a342c8e03bda518447b624ea64d1ff56", size = 517585, upload-time = "2025-10-22T22:23:23.699Z" }, + { url = "https://files.pythonhosted.org/packages/62/3c/ca704b8d324a2591b0b0adcfcaadf9c862375b11f2f667ac03c61b4fd0a6/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f586db2e209d54fe177e58e0bc4946bea5fb0102f150b1b2f13de03e1f0976f8", size = 399828, upload-time = "2025-10-22T22:23:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/da/37/e84283b9e897e3adc46b4c88bb3f6ec92a43bd4d2f7ef5b13459963b2e9c/rpds_py-0.28.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae8ee156d6b586e4292491e885d41483136ab994e719a13458055bec14cf370", size = 375509, upload-time = "2025-10-22T22:23:27.32Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c2/a980beab869d86258bf76ec42dec778ba98151f253a952b02fe36d72b29c/rpds_py-0.28.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:a805e9b3973f7e27f7cab63a6b4f61d90f2e5557cff73b6e97cd5b8540276d3d", size = 392014, upload-time = "2025-10-22T22:23:29.332Z" }, + { url = "https://files.pythonhosted.org/packages/da/b5/b1d3c5f9d3fa5aeef74265f9c64de3c34a0d6d5cd3c81c8b17d5c8f10ed4/rpds_py-0.28.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d3fd16b6dc89c73a4da0b4ac8b12a7ecc75b2864b95c9e5afed8003cb50a728", size = 402410, upload-time = "2025-10-22T22:23:31.14Z" }, + { url = "https://files.pythonhosted.org/packages/74/ae/cab05ff08dfcc052afc73dcb38cbc765ffc86f94e966f3924cd17492293c/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6796079e5d24fdaba6d49bda28e2c47347e89834678f2bc2c1b4fc1489c0fb01", size = 553593, upload-time = "2025-10-22T22:23:32.834Z" }, + { url = "https://files.pythonhosted.org/packages/70/80/50d5706ea2a9bfc9e9c5f401d91879e7c790c619969369800cde202da214/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:76500820c2af232435cbe215e3324c75b950a027134e044423f59f5b9a1ba515", size = 576925, upload-time = "2025-10-22T22:23:34.47Z" }, + { url = "https://files.pythonhosted.org/packages/ab/12/85a57d7a5855a3b188d024b099fd09c90db55d32a03626d0ed16352413ff/rpds_py-0.28.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bbdc5640900a7dbf9dd707fe6388972f5bbd883633eb68b76591044cfe346f7e", size = 542444, upload-time = "2025-10-22T22:23:36.093Z" }, + { url = "https://files.pythonhosted.org/packages/6c/65/10643fb50179509150eb94d558e8837c57ca8b9adc04bd07b98e57b48f8c/rpds_py-0.28.0-cp314-cp314-win32.whl", hash = "sha256:adc8aa88486857d2b35d75f0640b949759f79dc105f50aa2c27816b2e0dd749f", size = 207968, upload-time = "2025-10-22T22:23:37.638Z" }, + { url = "https://files.pythonhosted.org/packages/b4/84/0c11fe4d9aaea784ff4652499e365963222481ac647bcd0251c88af646eb/rpds_py-0.28.0-cp314-cp314-win_amd64.whl", hash = "sha256:66e6fa8e075b58946e76a78e69e1a124a21d9a48a5b4766d15ba5b06869d1fa1", size = 218876, upload-time = "2025-10-22T22:23:39.179Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e0/3ab3b86ded7bb18478392dc3e835f7b754cd446f62f3fc96f4fe2aca78f6/rpds_py-0.28.0-cp314-cp314-win_arm64.whl", hash = "sha256:a6fe887c2c5c59413353b7c0caff25d0e566623501ccfff88957fa438a69377d", size = 212506, upload-time = "2025-10-22T22:23:40.755Z" }, + { url = "https://files.pythonhosted.org/packages/51/ec/d5681bb425226c3501eab50fc30e9d275de20c131869322c8a1729c7b61c/rpds_py-0.28.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7a69df082db13c7070f7b8b1f155fa9e687f1d6aefb7b0e3f7231653b79a067b", size = 355433, upload-time = "2025-10-22T22:23:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/be/ec/568c5e689e1cfb1ea8b875cffea3649260955f677fdd7ddc6176902d04cd/rpds_py-0.28.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b1cde22f2c30ebb049a9e74c5374994157b9b70a16147d332f89c99c5960737a", size = 342601, upload-time = "2025-10-22T22:23:44.372Z" }, + { url = "https://files.pythonhosted.org/packages/32/fe/51ada84d1d2a1d9d8f2c902cfddd0133b4a5eb543196ab5161d1c07ed2ad/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5338742f6ba7a51012ea470bd4dc600a8c713c0c72adaa0977a1b1f4327d6592", size = 372039, upload-time = "2025-10-22T22:23:46.025Z" }, + { url = "https://files.pythonhosted.org/packages/07/c1/60144a2f2620abade1a78e0d91b298ac2d9b91bc08864493fa00451ef06e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1460ebde1bcf6d496d80b191d854adedcc619f84ff17dc1c6d550f58c9efbba", size = 382407, upload-time = "2025-10-22T22:23:48.098Z" }, + { url = "https://files.pythonhosted.org/packages/45/ed/091a7bbdcf4038a60a461df50bc4c82a7ed6d5d5e27649aab61771c17585/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e3eb248f2feba84c692579257a043a7699e28a77d86c77b032c1d9fbb3f0219c", size = 518172, upload-time = "2025-10-22T22:23:50.16Z" }, + { url = "https://files.pythonhosted.org/packages/54/dd/02cc90c2fd9c2ef8016fd7813bfacd1c3a1325633ec8f244c47b449fc868/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3bbba5def70b16cd1c1d7255666aad3b290fbf8d0fe7f9f91abafb73611a91", size = 399020, upload-time = "2025-10-22T22:23:51.81Z" }, + { url = "https://files.pythonhosted.org/packages/ab/81/5d98cc0329bbb911ccecd0b9e19fbf7f3a5de8094b4cda5e71013b2dd77e/rpds_py-0.28.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3114f4db69ac5a1f32e7e4d1cbbe7c8f9cf8217f78e6e002cedf2d54c2a548ed", size = 377451, upload-time = "2025-10-22T22:23:53.711Z" }, + { url = "https://files.pythonhosted.org/packages/b4/07/4d5bcd49e3dfed2d38e2dcb49ab6615f2ceb9f89f5a372c46dbdebb4e028/rpds_py-0.28.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4b0cb8a906b1a0196b863d460c0222fb8ad0f34041568da5620f9799b83ccf0b", size = 390355, upload-time = "2025-10-22T22:23:55.299Z" }, + { url = "https://files.pythonhosted.org/packages/3f/79/9f14ba9010fee74e4f40bf578735cfcbb91d2e642ffd1abe429bb0b96364/rpds_py-0.28.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf681ac76a60b667106141e11a92a3330890257e6f559ca995fbb5265160b56e", size = 403146, upload-time = "2025-10-22T22:23:56.929Z" }, + { url = "https://files.pythonhosted.org/packages/39/4c/f08283a82ac141331a83a40652830edd3a4a92c34e07e2bbe00baaea2f5f/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1e8ee6413cfc677ce8898d9cde18cc3a60fc2ba756b0dec5b71eb6eb21c49fa1", size = 552656, upload-time = "2025-10-22T22:23:58.62Z" }, + { url = "https://files.pythonhosted.org/packages/61/47/d922fc0666f0dd8e40c33990d055f4cc6ecff6f502c2d01569dbed830f9b/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b3072b16904d0b5572a15eb9d31c1954e0d3227a585fc1351aa9878729099d6c", size = 576782, upload-time = "2025-10-22T22:24:00.312Z" }, + { url = "https://files.pythonhosted.org/packages/d3/0c/5bafdd8ccf6aa9d3bfc630cfece457ff5b581af24f46a9f3590f790e3df2/rpds_py-0.28.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b670c30fd87a6aec281c3c9896d3bae4b205fd75d79d06dc87c2503717e46092", size = 544671, upload-time = "2025-10-22T22:24:02.297Z" }, + { url = "https://files.pythonhosted.org/packages/2c/37/dcc5d8397caa924988693519069d0beea077a866128719351a4ad95e82fc/rpds_py-0.28.0-cp314-cp314t-win32.whl", hash = "sha256:8014045a15b4d2b3476f0a287fcc93d4f823472d7d1308d47884ecac9e612be3", size = 205749, upload-time = "2025-10-22T22:24:03.848Z" }, + { url = "https://files.pythonhosted.org/packages/d7/69/64d43b21a10d72b45939a28961216baeb721cc2a430f5f7c3bfa21659a53/rpds_py-0.28.0-cp314-cp314t-win_amd64.whl", hash = "sha256:7a4e59c90d9c27c561eb3160323634a9ff50b04e4f7820600a2beb0ac90db578", size = 216233, upload-time = "2025-10-22T22:24:05.471Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, +] + +[[package]] +name = "zipp" +version = "3.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, +] diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py new file mode 100644 index 000000000..188ea3307 --- /dev/null +++ b/src/llama_stack_api/vector_io.py @@ -0,0 +1,934 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Annotated, Any, Literal, Protocol, runtime_checkable + +from fastapi import Body, Query +from pydantic import BaseModel, Field, field_validator + +from llama_stack_api.inference import InterleavedContent +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.vector_stores import VectorStore +from llama_stack_api.version import LLAMA_STACK_API_V1 + + +@json_schema_type +class ChunkMetadata(BaseModel): + """ + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content. + :param document_id: The ID of the document this chunk belongs to. + :param source: The source of the content, such as a URL, file path, or other identifier. + :param created_timestamp: An optional timestamp indicating when the chunk was created. + :param updated_timestamp: An optional timestamp indicating when the chunk was last updated. + :param chunk_window: The window of the chunk, which can be used to group related chunks together. + :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken. + :param chunk_embedding_model: The embedding model used to create the chunk's embedding. + :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk. + :param content_token_count: The number of tokens in the content of the chunk. + :param metadata_token_count: The number of tokens in the metadata of the chunk. + """ + + chunk_id: str | None = None + document_id: str | None = None + source: str | None = None + created_timestamp: int | None = None + updated_timestamp: int | None = None + chunk_window: str | None = None + chunk_tokenizer: str | None = None + chunk_embedding_model: str | None = None + chunk_embedding_dimension: int | None = None + content_token_count: int | None = None + metadata_token_count: int | None = None + + +@json_schema_type +class Chunk(BaseModel): + """ + A chunk of content that can be inserted into a vector database. + :param content: The content of the chunk, which can be interleaved text, images, or other types. + :param chunk_id: Unique identifier for the chunk. Must be provided explicitly. + :param metadata: Metadata associated with the chunk that will be used in the model context during inference. + :param embedding: Optional embedding for the chunk. If not provided, it will be computed later. + :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + """ + + content: InterleavedContent + chunk_id: str + metadata: dict[str, Any] = Field(default_factory=dict) + embedding: list[float] | None = None + chunk_metadata: ChunkMetadata | None = None + + @property + def document_id(self) -> str | None: + """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" + # Check metadata first (takes precedence) + doc_id = self.metadata.get("document_id") + if doc_id is not None: + if not isinstance(doc_id, str): + raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}") + return doc_id + + # Fall back to chunk_metadata if available (Pydantic ensures type safety) + if self.chunk_metadata is not None: + return self.chunk_metadata.document_id + + return None + + +@json_schema_type +class QueryChunksResponse(BaseModel): + """Response from querying chunks in a vector database. + + :param chunks: List of content chunks returned from the query + :param scores: Relevance scores corresponding to each returned chunk + """ + + chunks: list[Chunk] + scores: list[float] + + +@json_schema_type +class VectorStoreFileCounts(BaseModel): + """File processing status counts for a vector store. + + :param completed: Number of files that have been successfully processed + :param cancelled: Number of files that had their processing cancelled + :param failed: Number of files that failed to process + :param in_progress: Number of files currently being processed + :param total: Total number of files in the vector store + """ + + completed: int + cancelled: int + failed: int + in_progress: int + total: int + + +# TODO: rename this as OpenAIVectorStore +@json_schema_type +class VectorStoreObject(BaseModel): + """OpenAI Vector Store object. + + :param id: Unique identifier for the vector store + :param object: Object type identifier, always "vector_store" + :param created_at: Timestamp when the vector store was created + :param name: (Optional) Name of the vector store + :param usage_bytes: Storage space used by the vector store in bytes + :param file_counts: File processing status counts for the vector store + :param status: Current status of the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param expires_at: (Optional) Timestamp when the vector store will expire + :param last_active_at: (Optional) Timestamp of last activity on the vector store + :param metadata: Set of key-value pairs that can be attached to the vector store + """ + + id: str + object: str = "vector_store" + created_at: int + name: str | None = None + usage_bytes: int = 0 + file_counts: VectorStoreFileCounts + status: str = "completed" + expires_after: dict[str, Any] | None = None + expires_at: int | None = None + last_active_at: int | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +@json_schema_type +class VectorStoreCreateRequest(BaseModel): + """Request to create a vector store. + + :param name: (Optional) Name for the vector store + :param file_ids: List of file IDs to include in the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param chunking_strategy: (Optional) Strategy for splitting files into chunks + :param metadata: Set of key-value pairs that can be attached to the vector store + """ + + name: str | None = None + file_ids: list[str] = Field(default_factory=list) + expires_after: dict[str, Any] | None = None + chunking_strategy: dict[str, Any] | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +@json_schema_type +class VectorStoreModifyRequest(BaseModel): + """Request to modify a vector store. + + :param name: (Optional) Updated name for the vector store + :param expires_after: (Optional) Updated expiration policy for the vector store + :param metadata: (Optional) Updated set of key-value pairs for the vector store + """ + + name: str | None = None + expires_after: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + + +@json_schema_type +class VectorStoreListResponse(BaseModel): + """Response from listing vector stores. + + :param object: Object type identifier, always "list" + :param data: List of vector store objects + :param first_id: (Optional) ID of the first vector store in the list for pagination + :param last_id: (Optional) ID of the last vector store in the list for pagination + :param has_more: Whether there are more vector stores available beyond this page + """ + + object: str = "list" + data: list[VectorStoreObject] + first_id: str | None = None + last_id: str | None = None + has_more: bool = False + + +@json_schema_type +class VectorStoreSearchRequest(BaseModel): + """Request to search a vector store. + + :param query: Search query as a string or list of strings + :param filters: (Optional) Filters based on file attributes to narrow search results + :param max_num_results: Maximum number of results to return, defaults to 10 + :param ranking_options: (Optional) Options for ranking and filtering search results + :param rewrite_query: Whether to rewrite the query for better vector search performance + """ + + query: str | list[str] + filters: dict[str, Any] | None = None + max_num_results: int = 10 + ranking_options: dict[str, Any] | None = None + rewrite_query: bool = False + + +@json_schema_type +class VectorStoreContent(BaseModel): + """Content item from a vector store file or search result. + + :param type: Content type, currently only "text" is supported + :param text: The actual text content + :param embedding: Optional embedding vector for this content chunk + :param chunk_metadata: Optional chunk metadata + :param metadata: Optional user-defined metadata + """ + + type: Literal["text"] + text: str + embedding: list[float] | None = None + chunk_metadata: ChunkMetadata | None = None + metadata: dict[str, Any] | None = None + + +@json_schema_type +class VectorStoreSearchResponse(BaseModel): + """Response from searching a vector store. + + :param file_id: Unique identifier of the file containing the result + :param filename: Name of the file containing the result + :param score: Relevance score for this search result + :param attributes: (Optional) Key-value attributes associated with the file + :param content: List of content items matching the search query + """ + + file_id: str + filename: str + score: float + attributes: dict[str, str | float | bool] | None = None + content: list[VectorStoreContent] + + +@json_schema_type +class VectorStoreSearchResponsePage(BaseModel): + """Paginated response from searching a vector store. + + :param object: Object type identifier for the search results page + :param search_query: The original search query that was executed + :param data: List of search result objects + :param has_more: Whether there are more results available beyond this page + :param next_page: (Optional) Token for retrieving the next page of results + """ + + object: str = "vector_store.search_results.page" + search_query: list[str] + data: list[VectorStoreSearchResponse] + has_more: bool = False + next_page: str | None = None + + +@json_schema_type +class VectorStoreDeleteResponse(BaseModel): + """Response from deleting a vector store. + + :param id: Unique identifier of the deleted vector store + :param object: Object type identifier for the deletion response + :param deleted: Whether the deletion operation was successful + """ + + id: str + object: str = "vector_store.deleted" + deleted: bool = True + + +@json_schema_type +class VectorStoreFileContentResponse(BaseModel): + """Represents the parsed content of a vector store file. + + :param object: The object type, which is always `vector_store.file_content.page` + :param data: Parsed content of the file + :param has_more: Indicates if there are more content pages to fetch + :param next_page: The token for the next page, if any + """ + + object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" + data: list[VectorStoreContent] + has_more: bool = False + next_page: str | None = None + + +@json_schema_type +class VectorStoreChunkingStrategyAuto(BaseModel): + """Automatic chunking strategy for vector store files. + + :param type: Strategy type, always "auto" for automatic chunking + """ + + type: Literal["auto"] = "auto" + + +@json_schema_type +class VectorStoreChunkingStrategyStaticConfig(BaseModel): + """Configuration for static chunking strategy. + + :param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks + :param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096 + """ + + chunk_overlap_tokens: int = 400 + max_chunk_size_tokens: int = Field(800, ge=100, le=4096) + + +@json_schema_type +class VectorStoreChunkingStrategyStatic(BaseModel): + """Static chunking strategy with configurable parameters. + + :param type: Strategy type, always "static" for static chunking + :param static: Configuration parameters for the static chunking strategy + """ + + type: Literal["static"] = "static" + static: VectorStoreChunkingStrategyStaticConfig + + +VectorStoreChunkingStrategy = Annotated[ + VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic, + Field(discriminator="type"), +] +register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy") + + +class SearchRankingOptions(BaseModel): + """Options for ranking and filtering search results. + + :param ranker: (Optional) Name of the ranking algorithm to use + :param score_threshold: (Optional) Minimum relevance score threshold for results + """ + + ranker: str | None = None + # NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however + # we don't guarantee that the score is between 0 and 1, so will leave this unconstrained + # and let the provider handle it + score_threshold: float | None = Field(default=0.0) + + +@json_schema_type +class VectorStoreFileLastError(BaseModel): + """Error information for failed vector store file processing. + + :param code: Error code indicating the type of failure + :param message: Human-readable error message describing the failure + """ + + code: Literal["server_error"] | Literal["rate_limit_exceeded"] + message: str + + +VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"] +register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus") + + +# VectorStoreFileAttributes type with OpenAPI constraints +VectorStoreFileAttributes = Annotated[ + dict[str, Annotated[str, Field(max_length=512)] | float | bool], + Field( + max_length=16, + json_schema_extra={ + "propertyNames": {"type": "string", "maxLength": 64}, + "x-oaiTypeLabel": "map", + }, + description=( + "Set of 16 key-value pairs that can be attached to an object. This can be " + "useful for storing additional information about the object in a structured " + "format, and querying for objects via API or the dashboard. Keys are strings " + "with a maximum length of 64 characters. Values are strings with a maximum " + "length of 512 characters, booleans, or numbers." + ), + ), +] + + +def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]: + """ + Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only). + + Converts dict[str, Any] to dict[str, str | float | bool]: + - Preserves: str (truncated to 512 chars), bool, int/float (as float) + - Converts: list -> comma-separated string + - Filters: dict, None, other types + - Enforces: max 16 properties, max 64 char keys, max 512 char string values + """ + if not metadata: + return {} + + sanitized: dict[str, str | float | bool] = {} + for key, value in metadata.items(): + # Enforce max 16 properties + if len(sanitized) >= 16: + break + + # Enforce max 64 char keys + if len(key) > 64: + continue + + # Convert to supported primitive types + if isinstance(value, bool): + sanitized[key] = value + elif isinstance(value, int | float): + sanitized[key] = float(value) + elif isinstance(value, str): + # Enforce max 512 char string values + sanitized[key] = value[:512] if len(value) > 512 else value + elif isinstance(value, list): + # Convert lists to comma-separated strings (max 512 chars) + list_str = ", ".join(str(item) for item in value) + sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str + + return sanitized + + +@json_schema_type +class VectorStoreFileObject(BaseModel): + """OpenAI Vector Store File object. + + :param id: Unique identifier for the file + :param object: Object type identifier, always "vector_store.file" + :param attributes: Key-value attributes associated with the file + :param chunking_strategy: Strategy used for splitting the file into chunks + :param created_at: Timestamp when the file was added to the vector store + :param last_error: (Optional) Error information if file processing failed + :param status: Current processing status of the file + :param usage_bytes: Storage space used by this file in bytes + :param vector_store_id: ID of the vector store containing this file + """ + + id: str + object: str = "vector_store.file" + attributes: VectorStoreFileAttributes = Field(default_factory=dict) + chunking_strategy: VectorStoreChunkingStrategy + created_at: int + last_error: VectorStoreFileLastError | None = None + status: VectorStoreFileStatus + usage_bytes: int = 0 + vector_store_id: str + + @field_validator("attributes", mode="before") + @classmethod + def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]: + """Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec.""" + return _sanitize_vector_store_attributes(v) + + +@json_schema_type +class VectorStoreListFilesResponse(BaseModel): + """Response from listing files in a vector store. + + :param object: Object type identifier, always "list" + :param data: List of vector store file objects + :param first_id: (Optional) ID of the first file in the list for pagination + :param last_id: (Optional) ID of the last file in the list for pagination + :param has_more: Whether there are more files available beyond this page + """ + + object: str = "list" + data: list[VectorStoreFileObject] + first_id: str | None = None + last_id: str | None = None + has_more: bool = False + + +@json_schema_type +class VectorStoreFileDeleteResponse(BaseModel): + """Response from deleting a vector store file. + + :param id: Unique identifier of the deleted file + :param object: Object type identifier for the deletion response + :param deleted: Whether the deletion operation was successful + """ + + id: str + object: str = "vector_store.file.deleted" + deleted: bool = True + + +@json_schema_type +class VectorStoreFileBatchObject(BaseModel): + """OpenAI Vector Store File Batch object. + + :param id: Unique identifier for the file batch + :param object: Object type identifier, always "vector_store.file_batch" + :param created_at: Timestamp when the file batch was created + :param vector_store_id: ID of the vector store containing the file batch + :param status: Current processing status of the file batch + :param file_counts: File processing status counts for the batch + """ + + id: str + object: str = "vector_store.file_batch" + created_at: int + vector_store_id: str + status: VectorStoreFileStatus + file_counts: VectorStoreFileCounts + + +@json_schema_type +class VectorStoreFilesListInBatchResponse(BaseModel): + """Response from listing files in a vector store file batch. + + :param object: Object type identifier, always "list" + :param data: List of vector store file objects in the batch + :param first_id: (Optional) ID of the first file in the list for pagination + :param last_id: (Optional) ID of the last file in the list for pagination + :param has_more: Whether there are more files available beyond this page + """ + + object: str = "list" + data: list[VectorStoreFileObject] + first_id: str | None = None + last_id: str | None = None + has_more: bool = False + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store with extra_body support. + + :param name: (Optional) A name for the vector store + :param file_ids: List of file IDs to include in the vector store + :param expires_after: (Optional) Expiration policy for the vector store + :param chunking_strategy: (Optional) Strategy for splitting files into chunks + :param metadata: Set of key-value pairs that can be attached to the vector store + """ + + name: str | None = None + file_ids: list[str] | None = None + expires_after: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None + metadata: dict[str, Any] | None = None + + +# extra_body can be accessed via .model_extra +@json_schema_type +class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"): + """Request to create a vector store file batch with extra_body support. + + :param file_ids: A list of File IDs that the vector store should use + :param attributes: (Optional) Key-value attributes to store with the files + :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto + """ + + file_ids: list[str] + attributes: dict[str, Any] | None = None + chunking_strategy: VectorStoreChunkingStrategy | None = None + + +class VectorStoreTable(Protocol): + def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ... + + +@runtime_checkable +class VectorIO(Protocol): + vector_store_table: VectorStoreTable | None = None + + # this will just block now until chunks are inserted, but it should + # probably return a Job instance which can be polled for completion + # TODO: rename vector_store_id to vector_store_id once Stainless is working + @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) + async def insert_chunks( + self, + vector_store_id: str, + chunks: list[Chunk], + ttl_seconds: int | None = None, + ) -> None: + """Insert chunks into a vector database. + + :param vector_store_id: The identifier of the vector database to insert the chunks into. + :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. + `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. + If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. + If `embedding` is not provided, it will be computed later. + :param ttl_seconds: The time to live of the chunks. + """ + ... + + # TODO: rename vector_store_id to vector_store_id once Stainless is working + @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) + async def query_chunks( + self, + vector_store_id: str, + query: InterleavedContent, + params: dict[str, Any] | None = None, + ) -> QueryChunksResponse: + """Query chunks from a vector database. + + :param vector_store_id: The identifier of the vector database to query. + :param query: The query to search for. + :param params: The parameters of the query. + :returns: A QueryChunksResponse. + """ + ... + + # OpenAI Vector Stores API endpoints + @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) + async def openai_create_vector_store( + self, + params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)], + ) -> VectorStoreObject: + """Creates a vector store. + + Generate an OpenAI-compatible vector store with the given parameters. + :returns: A VectorStoreObject representing the created vector store. + """ + ... + + @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1) + async def openai_list_vector_stores( + self, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ) -> VectorStoreListResponse: + """Returns a list of vector stores. + + :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. + :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. + :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. + :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. + :returns: A VectorStoreListResponse containing the list of vector stores. + """ + ... + + @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1) + async def openai_retrieve_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreObject: + """Retrieves a vector store. + + :param vector_store_id: The ID of the vector store to retrieve. + :returns: A VectorStoreObject representing the vector store. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_update_vector_store( + self, + vector_store_id: str, + name: str | None = None, + expires_after: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreObject: + """Updates a vector store. + + :param vector_store_id: The ID of the vector store to update. + :param name: The name of the vector store. + :param expires_after: The expiration policy for a vector store. + :param metadata: Set of 16 key-value pairs that can be attached to an object. + :returns: A VectorStoreObject representing the updated vector store. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}", + method="DELETE", + level=LLAMA_STACK_API_V1, + ) + async def openai_delete_vector_store( + self, + vector_store_id: str, + ) -> VectorStoreDeleteResponse: + """Delete a vector store. + + :param vector_store_id: The ID of the vector store to delete. + :returns: A VectorStoreDeleteResponse indicating the deletion status. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/search", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_search_vector_store( + self, + vector_store_id: str, + query: str | list[str], + filters: dict[str, Any] | None = None, + max_num_results: int | None = 10, + ranking_options: SearchRankingOptions | None = None, + rewrite_query: bool | None = False, + search_mode: ( + str | None + ) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations + ) -> VectorStoreSearchResponsePage: + """Search for chunks in a vector store. + + Searches a vector store for relevant chunks based on a query and optional file attribute filters. + + :param vector_store_id: The ID of the vector store to search. + :param query: The query string or array for performing the search. + :param filters: Filters based on file attributes to narrow the search results. + :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10). + :param ranking_options: Ranking options for fine-tuning the search results. + :param rewrite_query: Whether to rewrite the natural language query for vector search (default false) + :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector") + :returns: A VectorStoreSearchResponse containing the search results. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_attach_file_to_vector_store( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any] | None = None, + chunking_strategy: VectorStoreChunkingStrategy | None = None, + ) -> VectorStoreFileObject: + """Attach a file to a vector store. + + :param vector_store_id: The ID of the vector store to attach the file to. + :param file_id: The ID of the file to attach to the vector store. + :param attributes: The key-value attributes stored with the file, which can be used for filtering. + :param chunking_strategy: The chunking strategy to use for the file. + :returns: A VectorStoreFileObject representing the attached file. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files", + method="GET", + level=LLAMA_STACK_API_V1, + ) + async def openai_list_files_in_vector_store( + self, + vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, + ) -> VectorStoreListFilesResponse: + """List files in a vector store. + + :param vector_store_id: The ID of the vector store to list files from. + :param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. + :param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. + :param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list. + :param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list. + :param filter: (Optional) Filter by file status to only return files with the specified status. + :returns: A VectorStoreListFilesResponse containing the list of files. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files/{file_id}", + method="GET", + level=LLAMA_STACK_API_V1, + ) + async def openai_retrieve_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileObject: + """Retrieves a vector store file. + + :param vector_store_id: The ID of the vector store containing the file to retrieve. + :param file_id: The ID of the file to retrieve. + :returns: A VectorStoreFileObject representing the file. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files/{file_id}/content", + method="GET", + level=LLAMA_STACK_API_V1, + ) + async def openai_retrieve_vector_store_file_contents( + self, + vector_store_id: str, + file_id: str, + include_embeddings: Annotated[bool | None, Query()] = False, + include_metadata: Annotated[bool | None, Query()] = False, + ) -> VectorStoreFileContentResponse: + """Retrieves the contents of a vector store file. + + :param vector_store_id: The ID of the vector store containing the file to retrieve. + :param file_id: The ID of the file to retrieve. + :param include_embeddings: Whether to include embedding vectors in the response. + :param include_metadata: Whether to include chunk metadata in the response. + :returns: File contents, optionally with embeddings and metadata based on query parameters. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files/{file_id}", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_update_vector_store_file( + self, + vector_store_id: str, + file_id: str, + attributes: dict[str, Any], + ) -> VectorStoreFileObject: + """Updates a vector store file. + + :param vector_store_id: The ID of the vector store containing the file to update. + :param file_id: The ID of the file to update. + :param attributes: The updated key-value attributes to store with the file. + :returns: A VectorStoreFileObject representing the updated file. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/files/{file_id}", + method="DELETE", + level=LLAMA_STACK_API_V1, + ) + async def openai_delete_vector_store_file( + self, + vector_store_id: str, + file_id: str, + ) -> VectorStoreFileDeleteResponse: + """Delete a vector store file. + + :param vector_store_id: The ID of the vector store containing the file to delete. + :param file_id: The ID of the file to delete. + :returns: A VectorStoreFileDeleteResponse indicating the deletion status. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/file_batches", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_create_vector_store_file_batch( + self, + vector_store_id: str, + params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)], + ) -> VectorStoreFileBatchObject: + """Create a vector store file batch. + + Generate an OpenAI-compatible vector store file batch for the given vector store. + :param vector_store_id: The ID of the vector store to create the file batch for. + :returns: A VectorStoreFileBatchObject representing the created file batch. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/file_batches/{batch_id}", + method="GET", + level=LLAMA_STACK_API_V1, + ) + async def openai_retrieve_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ) -> VectorStoreFileBatchObject: + """Retrieve a vector store file batch. + + :param batch_id: The ID of the file batch to retrieve. + :param vector_store_id: The ID of the vector store containing the file batch. + :returns: A VectorStoreFileBatchObject representing the file batch. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", + method="GET", + level=LLAMA_STACK_API_V1, + ) + async def openai_list_files_in_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + after: str | None = None, + before: str | None = None, + filter: str | None = None, + limit: int | None = 20, + order: str | None = "desc", + ) -> VectorStoreFilesListInBatchResponse: + """Returns a list of vector store files in a batch. + + :param batch_id: The ID of the file batch to list files from. + :param vector_store_id: The ID of the vector store containing the file batch. + :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. + :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. + :param filter: Filter by file status. One of in_progress, completed, failed, cancelled. + :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20. + :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order. + :returns: A VectorStoreFilesListInBatchResponse containing the list of files in the batch. + """ + ... + + @webmethod( + route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", + method="POST", + level=LLAMA_STACK_API_V1, + ) + async def openai_cancel_vector_store_file_batch( + self, + batch_id: str, + vector_store_id: str, + ) -> VectorStoreFileBatchObject: + """Cancels a vector store file batch. + + :param batch_id: The ID of the file batch to cancel. + :param vector_store_id: The ID of the vector store containing the file batch. + :returns: A VectorStoreFileBatchObject representing the cancelled file batch. + """ + ... diff --git a/src/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py new file mode 100644 index 000000000..0a1e6c53c --- /dev/null +++ b/src/llama_stack_api/vector_stores.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Literal + +from pydantic import BaseModel + +from llama_stack_api.resource import Resource, ResourceType + + +# Internal resource type for storing the vector store routing and other information +class VectorStore(Resource): + """Vector database resource for storing and querying vector embeddings. + + :param type: Type of resource, always 'vector_store' for vector stores + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + """ + + type: Literal[ResourceType.vector_store] = ResourceType.vector_store + + embedding_model: str + embedding_dimension: int + vector_store_name: str | None = None + + @property + def vector_store_id(self) -> str: + return self.identifier + + @property + def provider_vector_store_id(self) -> str | None: + return self.provider_resource_id + + +class VectorStoreInput(BaseModel): + """Input parameters for creating or configuring a vector database. + + :param vector_store_id: Unique identifier for the vector store + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store + """ + + vector_store_id: str + embedding_model: str + embedding_dimension: int + provider_id: str | None = None + provider_vector_store_id: str | None = None diff --git a/llama_stack/apis/version.py b/src/llama_stack_api/version.py similarity index 100% rename from llama_stack/apis/version.py rename to src/llama_stack_api/version.py diff --git a/src/llama_stack_ui/.dockerignore b/src/llama_stack_ui/.dockerignore new file mode 100644 index 000000000..e3d1daae6 --- /dev/null +++ b/src/llama_stack_ui/.dockerignore @@ -0,0 +1,20 @@ +.git +.gitignore +.env.local +.env.*.local +.next +node_modules +npm-debug.log +*.md +.DS_Store +.vscode +.idea +playwright-report +e2e +jest.config.ts +jest.setup.ts +eslint.config.mjs +.prettierrc +.prettierignore +.nvmrc +playwright.config.ts diff --git a/llama_stack/ui/.gitignore b/src/llama_stack_ui/.gitignore similarity index 100% rename from llama_stack/ui/.gitignore rename to src/llama_stack_ui/.gitignore diff --git a/llama_stack/ui/.nvmrc b/src/llama_stack_ui/.nvmrc similarity index 100% rename from llama_stack/ui/.nvmrc rename to src/llama_stack_ui/.nvmrc diff --git a/llama_stack/ui/.prettierignore b/src/llama_stack_ui/.prettierignore similarity index 100% rename from llama_stack/ui/.prettierignore rename to src/llama_stack_ui/.prettierignore diff --git a/llama_stack/ui/.prettierrc b/src/llama_stack_ui/.prettierrc similarity index 100% rename from llama_stack/ui/.prettierrc rename to src/llama_stack_ui/.prettierrc diff --git a/src/llama_stack_ui/Containerfile b/src/llama_stack_ui/Containerfile new file mode 100644 index 000000000..6aea3dbfd --- /dev/null +++ b/src/llama_stack_ui/Containerfile @@ -0,0 +1,18 @@ +FROM node:22.5.1-alpine + +ENV NODE_ENV=production + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Create non-root user for security +RUN addgroup --system --gid 1001 nodejs +RUN adduser --system --uid 1001 nextjs + +# Install llama-stack-ui from npm +RUN npm install -g llama-stack-ui + +USER nextjs + +ENTRYPOINT ["dumb-init", "--"] +CMD ["llama-stack-ui"] diff --git a/llama_stack/ui/README.md b/src/llama_stack_ui/README.md similarity index 100% rename from llama_stack/ui/README.md rename to src/llama_stack_ui/README.md diff --git a/llama_stack/ui/app/api/auth/[...nextauth]/route.ts b/src/llama_stack_ui/app/api/auth/[...nextauth]/route.ts similarity index 100% rename from llama_stack/ui/app/api/auth/[...nextauth]/route.ts rename to src/llama_stack_ui/app/api/auth/[...nextauth]/route.ts diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/src/llama_stack_ui/app/api/v1/[...path]/route.ts similarity index 91% rename from llama_stack/ui/app/api/v1/[...path]/route.ts rename to src/llama_stack_ui/app/api/v1/[...path]/route.ts index 51c1f8004..d1aa31014 100644 --- a/llama_stack/ui/app/api/v1/[...path]/route.ts +++ b/src/llama_stack_ui/app/api/v1/[...path]/route.ts @@ -51,10 +51,14 @@ async function proxyRequest(request: NextRequest, method: string) { ); // Create response with same status and headers - const proxyResponse = new NextResponse(responseText, { - status: response.status, - statusText: response.statusText, - }); + // Handle 204 No Content responses specially + const proxyResponse = + response.status === 204 + ? new NextResponse(null, { status: 204 }) + : new NextResponse(responseText, { + status: response.status, + statusText: response.statusText, + }); // Copy response headers (except problematic ones) response.headers.forEach((value, key) => { diff --git a/llama_stack/ui/app/auth/signin/page.tsx b/src/llama_stack_ui/app/auth/signin/page.tsx similarity index 100% rename from llama_stack/ui/app/auth/signin/page.tsx rename to src/llama_stack_ui/app/auth/signin/page.tsx diff --git a/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx b/src/llama_stack_ui/app/chat-playground/chunk-processor.test.tsx similarity index 100% rename from llama_stack/ui/app/chat-playground/chunk-processor.test.tsx rename to src/llama_stack_ui/app/chat-playground/chunk-processor.test.tsx diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/src/llama_stack_ui/app/chat-playground/page.test.tsx similarity index 100% rename from llama_stack/ui/app/chat-playground/page.test.tsx rename to src/llama_stack_ui/app/chat-playground/page.test.tsx diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/src/llama_stack_ui/app/chat-playground/page.tsx similarity index 100% rename from llama_stack/ui/app/chat-playground/page.tsx rename to src/llama_stack_ui/app/chat-playground/page.tsx diff --git a/llama_stack/ui/app/globals.css b/src/llama_stack_ui/app/globals.css similarity index 100% rename from llama_stack/ui/app/globals.css rename to src/llama_stack_ui/app/globals.css diff --git a/llama_stack/ui/app/layout.tsx b/src/llama_stack_ui/app/layout.tsx similarity index 100% rename from llama_stack/ui/app/layout.tsx rename to src/llama_stack_ui/app/layout.tsx diff --git a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx b/src/llama_stack_ui/app/logs/chat-completions/[id]/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/chat-completions/[id]/page.tsx rename to src/llama_stack_ui/app/logs/chat-completions/[id]/page.tsx diff --git a/llama_stack/ui/app/logs/chat-completions/layout.tsx b/src/llama_stack_ui/app/logs/chat-completions/layout.tsx similarity index 100% rename from llama_stack/ui/app/logs/chat-completions/layout.tsx rename to src/llama_stack_ui/app/logs/chat-completions/layout.tsx diff --git a/llama_stack/ui/app/logs/chat-completions/page.tsx b/src/llama_stack_ui/app/logs/chat-completions/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/chat-completions/page.tsx rename to src/llama_stack_ui/app/logs/chat-completions/page.tsx diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/src/llama_stack_ui/app/logs/responses/[id]/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/responses/[id]/page.tsx rename to src/llama_stack_ui/app/logs/responses/[id]/page.tsx diff --git a/llama_stack/ui/app/logs/responses/layout.tsx b/src/llama_stack_ui/app/logs/responses/layout.tsx similarity index 100% rename from llama_stack/ui/app/logs/responses/layout.tsx rename to src/llama_stack_ui/app/logs/responses/layout.tsx diff --git a/llama_stack/ui/app/logs/responses/page.tsx b/src/llama_stack_ui/app/logs/responses/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/responses/page.tsx rename to src/llama_stack_ui/app/logs/responses/page.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/[id]/page.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/[id]/page.tsx rename to src/llama_stack_ui/app/logs/vector-stores/[id]/page.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/src/llama_stack_ui/app/logs/vector-stores/layout.tsx similarity index 100% rename from llama_stack/ui/app/logs/vector-stores/layout.tsx rename to src/llama_stack_ui/app/logs/vector-stores/layout.tsx diff --git a/src/llama_stack_ui/app/logs/vector-stores/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/page.tsx new file mode 100644 index 000000000..84680e01a --- /dev/null +++ b/src/llama_stack_ui/app/logs/vector-stores/page.tsx @@ -0,0 +1,406 @@ +"use client"; + +import React from "react"; +import type { + ListVectorStoresResponse, + VectorStore, +} from "llama-stack-client/resources/vector-stores/vector-stores"; +import { useRouter } from "next/navigation"; +import { usePagination } from "@/hooks/use-pagination"; +import { Button } from "@/components/ui/button"; +import { Plus, Trash2, Search, Edit, X } from "lucide-react"; +import { useState } from "react"; +import { Input } from "@/components/ui/input"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { Skeleton } from "@/components/ui/skeleton"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { + VectorStoreEditor, + VectorStoreFormData, +} from "@/components/vector-stores/vector-store-editor"; + +export default function VectorStoresPage() { + const router = useRouter(); + const client = useAuthClient(); + const [deletingStores, setDeletingStores] = useState>(new Set()); + const [searchTerm, setSearchTerm] = useState(""); + const [showVectorStoreModal, setShowVectorStoreModal] = useState(false); + const [editingStore, setEditingStore] = useState(null); + const [modalError, setModalError] = useState(null); + const [showSuccessState, setShowSuccessState] = useState(false); + const { + data: stores, + status, + hasMore, + error, + loadMore, + } = usePagination({ + limit: 20, + order: "desc", + fetchFunction: async (client, params) => { + const response = await client.vectorStores.list({ + after: params.after, + limit: params.limit, + order: params.order, + } as Parameters[0]); + return response as ListVectorStoresResponse; + }, + errorMessagePrefix: "vector stores", + }); + + // Auto-load all pages for infinite scroll behavior (like Responses) + React.useEffect(() => { + if (status === "idle" && hasMore) { + loadMore(); + } + }, [status, hasMore, loadMore]); + + // Handle ESC key to close modal + React.useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape" && showVectorStoreModal) { + handleCancel(); + } + }; + + document.addEventListener("keydown", handleEscape); + return () => document.removeEventListener("keydown", handleEscape); + }, [showVectorStoreModal]); + + const handleDeleteVectorStore = async (storeId: string) => { + if ( + !confirm( + "Are you sure you want to delete this vector store? This action cannot be undone." + ) + ) { + return; + } + + setDeletingStores(prev => new Set([...prev, storeId])); + + try { + await client.vectorStores.delete(storeId); + // Reload the data to reflect the deletion + window.location.reload(); + } catch (err: unknown) { + console.error("Failed to delete vector store:", err); + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + alert(`Failed to delete vector store: ${errorMessage}`); + } finally { + setDeletingStores(prev => { + const newSet = new Set(prev); + newSet.delete(storeId); + return newSet; + }); + } + }; + + const handleSaveVectorStore = async (formData: VectorStoreFormData) => { + try { + setModalError(null); + + if (editingStore) { + // Update existing vector store + const updateParams: { + name?: string; + extra_body?: Record; + } = {}; + + // Only include fields that have changed or are provided + if (formData.name && formData.name !== editingStore.name) { + updateParams.name = formData.name; + } + + // Add all parameters to extra_body (except provider_id which can't be changed) + const extraBody: Record = {}; + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + updateParams.extra_body = extraBody; + } + + await client.vectorStores.update(editingStore.id, updateParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes." + ); + return; + } + + const createParams: { + name?: string; + provider_id?: string; + extra_body?: Record; + } = { + name: formData.name || undefined, + }; + + // Extract provider_id to top-level (like Python client does) + if (formData.provider_id) { + createParams.provider_id = formData.provider_id; + } + + // Add remaining parameters to extra_body + const extraBody: Record = {}; + if (formData.provider_id) { + extraBody.provider_id = formData.provider_id; + } + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + createParams.extra_body = extraBody; + } + + await client.vectorStores.create(createParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store created successfully! You can close this modal and refresh the page to see changes." + ); + } catch (err: unknown) { + console.error("Failed to create vector store:", err); + const errorMessage = + err instanceof Error ? err.message : "Failed to create vector store"; + setModalError(errorMessage); + } + }; + + const handleEditVectorStore = (store: VectorStore) => { + setEditingStore(store); + setShowVectorStoreModal(true); + setModalError(null); + }; + + const handleCancel = () => { + setShowVectorStoreModal(false); + setEditingStore(null); + setModalError(null); + setShowSuccessState(false); + }; + + const renderContent = () => { + if (status === "loading") { + return ( +
+ + + +
+ ); + } + + if (status === "error") { + return
Error: {error?.message}
; + } + + if (!stores || stores.length === 0) { + return

No vector stores found.

; + } + + // Filter stores based on search term + const filteredStores = stores.filter(store => { + if (!searchTerm) return true; + + const searchLower = searchTerm.toLowerCase(); + return ( + store.id.toLowerCase().includes(searchLower) || + (store.name && store.name.toLowerCase().includes(searchLower)) || + (store.metadata?.provider_id && + String(store.metadata.provider_id) + .toLowerCase() + .includes(searchLower)) || + (store.metadata?.provider_vector_db_id && + String(store.metadata.provider_vector_db_id) + .toLowerCase() + .includes(searchLower)) + ); + }); + + return ( +
+ {/* Search Bar */} +
+ + setSearchTerm(e.target.value)} + className="pl-10" + /> +
+ +
+ + + + ID + Name + Created + Completed + Cancelled + Failed + In Progress + Total + Usage Bytes + Provider ID + Provider Vector DB ID + Actions + + + + {filteredStores.map(store => { + const fileCounts = store.file_counts; + const metadata = store.metadata || {}; + const providerId = metadata.provider_id ?? ""; + const providerDbId = metadata.provider_vector_db_id ?? ""; + + return ( + + router.push(`/logs/vector-stores/${store.id}`) + } + className="cursor-pointer hover:bg-muted/50" + > + + + + {store.name} + + {new Date(store.created_at * 1000).toLocaleString()} + + {fileCounts.completed} + {fileCounts.cancelled} + {fileCounts.failed} + {fileCounts.in_progress} + {fileCounts.total} + {store.usage_bytes} + {providerId} + {providerDbId} + +
+ + +
+
+
+ ); + })} +
+
+
+
+ ); + }; + + return ( +
+
+

Vector Stores

+ +
+ {renderContent()} + + {/* Create Vector Store Modal */} + {showVectorStoreModal && ( +
+
+
+

+ {editingStore ? "Edit Vector Store" : "Create New Vector Store"} +

+ +
+
+ +
+
+
+ )} +
+ ); +} diff --git a/llama_stack/ui/app/page.tsx b/src/llama_stack_ui/app/page.tsx similarity index 100% rename from llama_stack/ui/app/page.tsx rename to src/llama_stack_ui/app/page.tsx diff --git a/src/llama_stack_ui/app/prompts/page.tsx b/src/llama_stack_ui/app/prompts/page.tsx new file mode 100644 index 000000000..30106a056 --- /dev/null +++ b/src/llama_stack_ui/app/prompts/page.tsx @@ -0,0 +1,5 @@ +import { PromptManagement } from "@/components/prompts"; + +export default function PromptsPage() { + return ; +} diff --git a/src/llama_stack_ui/bin/cli.js b/src/llama_stack_ui/bin/cli.js new file mode 100755 index 000000000..6069d2f22 --- /dev/null +++ b/src/llama_stack_ui/bin/cli.js @@ -0,0 +1,34 @@ +#!/usr/bin/env node + +const { spawn } = require('child_process'); +const path = require('path'); + +const port = process.env.LLAMA_STACK_UI_PORT || 8322; +const uiDir = path.resolve(__dirname, '..'); +const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js'); +const serverDir = path.dirname(serverPath); + +console.log(`Starting Llama Stack UI on http://localhost:${port}`); + +const child = spawn(process.execPath, [serverPath], { + cwd: serverDir, + stdio: 'inherit', + env: { + ...process.env, + PORT: port, + }, +}); + +process.on('SIGINT', () => { + child.kill('SIGINT'); + process.exit(0); +}); + +process.on('SIGTERM', () => { + child.kill('SIGTERM'); + process.exit(0); +}); + +child.on('exit', (code) => { + process.exit(code); +}); diff --git a/llama_stack/ui/components.json b/src/llama_stack_ui/components.json similarity index 100% rename from llama_stack/ui/components.json rename to src/llama_stack_ui/components.json diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx b/src/llama_stack_ui/components/chat-completions/chat-completion-detail.test.tsx similarity index 100% rename from llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx rename to src/llama_stack_ui/components/chat-completions/chat-completion-detail.test.tsx diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx b/src/llama_stack_ui/components/chat-completions/chat-completion-detail.tsx similarity index 100% rename from llama_stack/ui/components/chat-completions/chat-completion-detail.tsx rename to src/llama_stack_ui/components/chat-completions/chat-completion-detail.tsx diff --git a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx b/src/llama_stack_ui/components/chat-completions/chat-completion-table.test.tsx similarity index 100% rename from llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx rename to src/llama_stack_ui/components/chat-completions/chat-completion-table.test.tsx diff --git a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx b/src/llama_stack_ui/components/chat-completions/chat-completions-table.tsx similarity index 100% rename from llama_stack/ui/components/chat-completions/chat-completions-table.tsx rename to src/llama_stack_ui/components/chat-completions/chat-completions-table.tsx diff --git a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx b/src/llama_stack_ui/components/chat-completions/chat-messasge-item.tsx similarity index 100% rename from llama_stack/ui/components/chat-completions/chat-messasge-item.tsx rename to src/llama_stack_ui/components/chat-completions/chat-messasge-item.tsx diff --git a/llama_stack/ui/components/chat-playground/chat-message.tsx b/src/llama_stack_ui/components/chat-playground/chat-message.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/chat-message.tsx rename to src/llama_stack_ui/components/chat-playground/chat-message.tsx diff --git a/llama_stack/ui/components/chat-playground/chat.tsx b/src/llama_stack_ui/components/chat-playground/chat.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/chat.tsx rename to src/llama_stack_ui/components/chat-playground/chat.tsx diff --git a/llama_stack/ui/components/chat-playground/conversations.test.tsx b/src/llama_stack_ui/components/chat-playground/conversations.test.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/conversations.test.tsx rename to src/llama_stack_ui/components/chat-playground/conversations.test.tsx diff --git a/llama_stack/ui/components/chat-playground/conversations.tsx b/src/llama_stack_ui/components/chat-playground/conversations.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/conversations.tsx rename to src/llama_stack_ui/components/chat-playground/conversations.tsx diff --git a/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx b/src/llama_stack_ui/components/chat-playground/interrupt-prompt.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/interrupt-prompt.tsx rename to src/llama_stack_ui/components/chat-playground/interrupt-prompt.tsx diff --git a/llama_stack/ui/components/chat-playground/markdown-renderer.tsx b/src/llama_stack_ui/components/chat-playground/markdown-renderer.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/markdown-renderer.tsx rename to src/llama_stack_ui/components/chat-playground/markdown-renderer.tsx diff --git a/llama_stack/ui/components/chat-playground/message-components.tsx b/src/llama_stack_ui/components/chat-playground/message-components.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/message-components.tsx rename to src/llama_stack_ui/components/chat-playground/message-components.tsx diff --git a/llama_stack/ui/components/chat-playground/message-input.tsx b/src/llama_stack_ui/components/chat-playground/message-input.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/message-input.tsx rename to src/llama_stack_ui/components/chat-playground/message-input.tsx diff --git a/llama_stack/ui/components/chat-playground/message-list.tsx b/src/llama_stack_ui/components/chat-playground/message-list.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/message-list.tsx rename to src/llama_stack_ui/components/chat-playground/message-list.tsx diff --git a/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx b/src/llama_stack_ui/components/chat-playground/prompt-suggestions.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/prompt-suggestions.tsx rename to src/llama_stack_ui/components/chat-playground/prompt-suggestions.tsx diff --git a/llama_stack/ui/components/chat-playground/typing-indicator.tsx b/src/llama_stack_ui/components/chat-playground/typing-indicator.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/typing-indicator.tsx rename to src/llama_stack_ui/components/chat-playground/typing-indicator.tsx diff --git a/llama_stack/ui/components/chat-playground/vector-db-creator.tsx b/src/llama_stack_ui/components/chat-playground/vector-db-creator.tsx similarity index 100% rename from llama_stack/ui/components/chat-playground/vector-db-creator.tsx rename to src/llama_stack_ui/components/chat-playground/vector-db-creator.tsx diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/src/llama_stack_ui/components/layout/app-sidebar.tsx similarity index 97% rename from llama_stack/ui/components/layout/app-sidebar.tsx rename to src/llama_stack_ui/components/layout/app-sidebar.tsx index 373f0c5ae..a5df60aef 100644 --- a/llama_stack/ui/components/layout/app-sidebar.tsx +++ b/src/llama_stack_ui/components/layout/app-sidebar.tsx @@ -8,6 +8,7 @@ import { MessageCircle, Settings2, Compass, + FileText, } from "lucide-react"; import Link from "next/link"; import { usePathname } from "next/navigation"; @@ -50,6 +51,11 @@ const manageItems = [ url: "/logs/vector-stores", icon: Database, }, + { + title: "Prompts", + url: "/prompts", + icon: FileText, + }, { title: "Documentation", url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html", diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/src/llama_stack_ui/components/layout/detail-layout.tsx similarity index 100% rename from llama_stack/ui/components/layout/detail-layout.tsx rename to src/llama_stack_ui/components/layout/detail-layout.tsx diff --git a/llama_stack/ui/components/layout/logs-layout.tsx b/src/llama_stack_ui/components/layout/logs-layout.tsx similarity index 100% rename from llama_stack/ui/components/layout/logs-layout.tsx rename to src/llama_stack_ui/components/layout/logs-layout.tsx diff --git a/llama_stack/ui/components/layout/page-breadcrumb.tsx b/src/llama_stack_ui/components/layout/page-breadcrumb.tsx similarity index 100% rename from llama_stack/ui/components/layout/page-breadcrumb.tsx rename to src/llama_stack_ui/components/layout/page-breadcrumb.tsx diff --git a/llama_stack/ui/components/logs/logs-table-scroll.test.tsx b/src/llama_stack_ui/components/logs/logs-table-scroll.test.tsx similarity index 100% rename from llama_stack/ui/components/logs/logs-table-scroll.test.tsx rename to src/llama_stack_ui/components/logs/logs-table-scroll.test.tsx diff --git a/llama_stack/ui/components/logs/logs-table.test.tsx b/src/llama_stack_ui/components/logs/logs-table.test.tsx similarity index 100% rename from llama_stack/ui/components/logs/logs-table.test.tsx rename to src/llama_stack_ui/components/logs/logs-table.test.tsx diff --git a/llama_stack/ui/components/logs/logs-table.tsx b/src/llama_stack_ui/components/logs/logs-table.tsx similarity index 100% rename from llama_stack/ui/components/logs/logs-table.tsx rename to src/llama_stack_ui/components/logs/logs-table.tsx diff --git a/src/llama_stack_ui/components/prompts/index.ts b/src/llama_stack_ui/components/prompts/index.ts new file mode 100644 index 000000000..d190c5eb6 --- /dev/null +++ b/src/llama_stack_ui/components/prompts/index.ts @@ -0,0 +1,4 @@ +export { PromptManagement } from "./prompt-management"; +export { PromptList } from "./prompt-list"; +export { PromptEditor } from "./prompt-editor"; +export * from "./types"; diff --git a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx new file mode 100644 index 000000000..70e0e4e66 --- /dev/null +++ b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx @@ -0,0 +1,309 @@ +import React from "react"; +import { render, screen, fireEvent } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { PromptEditor } from "./prompt-editor"; +import type { Prompt } from "./types"; + +describe("PromptEditor", () => { + const mockOnSave = jest.fn(); + const mockOnCancel = jest.fn(); + const mockOnDelete = jest.fn(); + + const defaultProps = { + onSave: mockOnSave, + onCancel: mockOnCancel, + onDelete: mockOnDelete, + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe("Create Mode", () => { + test("renders create form correctly", () => { + render(); + + expect(screen.getByLabelText("Prompt Content *")).toBeInTheDocument(); + expect(screen.getByText("Variables")).toBeInTheDocument(); + expect(screen.getByText("Preview")).toBeInTheDocument(); + expect(screen.getByText("Create Prompt")).toBeInTheDocument(); + expect(screen.getByText("Cancel")).toBeInTheDocument(); + }); + + test("shows preview placeholder when no content", () => { + render(); + + expect( + screen.getByText("Enter content to preview the compiled prompt") + ).toBeInTheDocument(); + }); + + test("submits form with correct data", () => { + render(); + + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}, welcome!" }, + }); + + fireEvent.click(screen.getByText("Create Prompt")); + + expect(mockOnSave).toHaveBeenCalledWith({ + prompt: "Hello {{name}}, welcome!", + variables: [], + }); + }); + + test("prevents submission with empty prompt", () => { + render(); + + fireEvent.click(screen.getByText("Create Prompt")); + + expect(mockOnSave).not.toHaveBeenCalled(); + }); + }); + + describe("Edit Mode", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}, how is {{weather}}?", + version: 1, + variables: ["name", "weather"], + is_default: true, + }; + + test("renders edit form with existing data", () => { + render(); + + expect( + screen.getByDisplayValue("Hello {{name}}, how is {{weather}}?") + ).toBeInTheDocument(); + expect(screen.getAllByText("name")).toHaveLength(2); // One in variables, one in preview + expect(screen.getAllByText("weather")).toHaveLength(2); // One in variables, one in preview + expect(screen.getByText("Update Prompt")).toBeInTheDocument(); + expect(screen.getByText("Delete Prompt")).toBeInTheDocument(); + }); + + test("submits updated data correctly", () => { + render(); + + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Updated: Hello {{name}}!" }, + }); + + fireEvent.click(screen.getByText("Update Prompt")); + + expect(mockOnSave).toHaveBeenCalledWith({ + prompt: "Updated: Hello {{name}}!", + variables: ["name", "weather"], + }); + }); + }); + + describe("Variables Management", () => { + test("adds new variable", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "testVar" } }); + fireEvent.click(screen.getByText("Add")); + + expect(screen.getByText("testVar")).toBeInTheDocument(); + }); + + test("prevents adding duplicate variables", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + + // Add first variable + fireEvent.change(variableInput, { target: { value: "test" } }); + fireEvent.click(screen.getByText("Add")); + + // Try to add same variable again + fireEvent.change(variableInput, { target: { value: "test" } }); + + // Button should be disabled + expect(screen.getByText("Add")).toBeDisabled(); + }); + + test("removes variable", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name", "location"], + is_default: true, + }; + + render(); + + // Check that both variables are present initially + expect(screen.getAllByText("name").length).toBeGreaterThan(0); + expect(screen.getAllByText("location").length).toBeGreaterThan(0); + + // Remove the location variable by clicking the X button with the specific title + const removeLocationButton = screen.getByTitle( + "Remove location variable" + ); + fireEvent.click(removeLocationButton); + + // Name should still be there, location should be gone from the variables section + expect(screen.getAllByText("name").length).toBeGreaterThan(0); + expect( + screen.queryByTitle("Remove location variable") + ).not.toBeInTheDocument(); + }); + + test("adds variable on Enter key", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "enterVar" } }); + + // Simulate Enter key press + fireEvent.keyPress(variableInput, { + key: "Enter", + code: "Enter", + charCode: 13, + preventDefault: jest.fn(), + }); + + // Check if the variable was added by looking for the badge + expect(screen.getAllByText("enterVar").length).toBeGreaterThan(0); + }); + }); + + describe("Preview Functionality", () => { + test("shows live preview with variables", () => { + render(); + + // Add prompt content + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}, welcome to {{place}}!" }, + }); + + // Add variables + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "name" } }); + fireEvent.click(screen.getByText("Add")); + + fireEvent.change(variableInput, { target: { value: "place" } }); + fireEvent.click(screen.getByText("Add")); + + // Check that preview area shows the content + expect(screen.getByText("Compiled Prompt")).toBeInTheDocument(); + }); + + test("shows variable value inputs in preview", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name"], + is_default: true, + }; + + render(); + + expect(screen.getByText("Variable Values")).toBeInTheDocument(); + expect( + screen.getByPlaceholderText("Enter value for name") + ).toBeInTheDocument(); + }); + + test("shows color legend for variable states", () => { + render(); + + // Add content to show preview + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}" }, + }); + + expect(screen.getByText("Used")).toBeInTheDocument(); + expect(screen.getByText("Unused")).toBeInTheDocument(); + expect(screen.getByText("Undefined")).toBeInTheDocument(); + }); + }); + + describe("Error Handling", () => { + test("displays error message", () => { + const errorMessage = "Prompt contains undeclared variables"; + render(); + + expect(screen.getByText(errorMessage)).toBeInTheDocument(); + }); + }); + + describe("Delete Functionality", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name"], + is_default: true, + }; + + test("shows delete button in edit mode", () => { + render(); + + expect(screen.getByText("Delete Prompt")).toBeInTheDocument(); + }); + + test("hides delete button in create mode", () => { + render(); + + expect(screen.queryByText("Delete Prompt")).not.toBeInTheDocument(); + }); + + test("calls onDelete with confirmation", () => { + const originalConfirm = window.confirm; + window.confirm = jest.fn(() => true); + + render(); + + fireEvent.click(screen.getByText("Delete Prompt")); + + expect(window.confirm).toHaveBeenCalledWith( + "Are you sure you want to delete this prompt? This action cannot be undone." + ); + expect(mockOnDelete).toHaveBeenCalledWith("prompt_123"); + + window.confirm = originalConfirm; + }); + + test("does not delete when confirmation is cancelled", () => { + const originalConfirm = window.confirm; + window.confirm = jest.fn(() => false); + + render(); + + fireEvent.click(screen.getByText("Delete Prompt")); + + expect(mockOnDelete).not.toHaveBeenCalled(); + + window.confirm = originalConfirm; + }); + }); + + describe("Cancel Functionality", () => { + test("calls onCancel when cancel button is clicked", () => { + render(); + + fireEvent.click(screen.getByText("Cancel")); + + expect(mockOnCancel).toHaveBeenCalled(); + }); + }); +}); diff --git a/src/llama_stack_ui/components/prompts/prompt-editor.tsx b/src/llama_stack_ui/components/prompts/prompt-editor.tsx new file mode 100644 index 000000000..efa76f757 --- /dev/null +++ b/src/llama_stack_ui/components/prompts/prompt-editor.tsx @@ -0,0 +1,346 @@ +"use client"; + +import { useState, useEffect } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Textarea } from "@/components/ui/textarea"; +import { Badge } from "@/components/ui/badge"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { Separator } from "@/components/ui/separator"; +import { X, Plus, Save, Trash2 } from "lucide-react"; +import { Prompt, PromptFormData } from "./types"; + +interface PromptEditorProps { + prompt?: Prompt; + onSave: (prompt: PromptFormData) => void; + onCancel: () => void; + onDelete?: (promptId: string) => void; + error?: string | null; +} + +export function PromptEditor({ + prompt, + onSave, + onCancel, + onDelete, + error, +}: PromptEditorProps) { + const [formData, setFormData] = useState({ + prompt: "", + variables: [], + }); + + const [newVariable, setNewVariable] = useState(""); + const [variableValues, setVariableValues] = useState>( + {} + ); + + useEffect(() => { + if (prompt) { + setFormData({ + prompt: prompt.prompt || "", + variables: prompt.variables || [], + }); + } + }, [prompt]); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (!formData.prompt.trim()) { + return; + } + onSave(formData); + }; + + const addVariable = () => { + if ( + newVariable.trim() && + !formData.variables.includes(newVariable.trim()) + ) { + setFormData(prev => ({ + ...prev, + variables: [...prev.variables, newVariable.trim()], + })); + setNewVariable(""); + } + }; + + const removeVariable = (variableToRemove: string) => { + setFormData(prev => ({ + ...prev, + variables: prev.variables.filter( + variable => variable !== variableToRemove + ), + })); + }; + + const renderPreview = () => { + const text = formData.prompt; + if (!text) return text; + + // Split text by variable patterns and process each part + const parts = text.split(/(\{\{\s*\w+\s*\}\})/g); + + return parts.map((part, index) => { + const variableMatch = part.match(/\{\{\s*(\w+)\s*\}\}/); + if (variableMatch) { + const variableName = variableMatch[1]; + const isDefined = formData.variables.includes(variableName); + const value = variableValues[variableName]; + + if (!isDefined) { + // Variable not in variables list - likely a typo/bug (RED) + return ( + + {part} + + ); + } else if (value && value.trim()) { + // Variable defined and has value - show the value (GREEN) + return ( + + {value} + + ); + } else { + // Variable defined but empty (YELLOW) + return ( + + {part} + + ); + } + } + return part; + }); + }; + + const updateVariableValue = (variable: string, value: string) => { + setVariableValues(prev => ({ + ...prev, + [variable]: value, + })); + }; + + return ( +
+ {error && ( +
+

{error}

+
+ )} +
+ {/* Form Section */} +
+
+ +