mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
Merge branch 'main' into fix/vector-db-mandatory-provider-id
This commit is contained in:
commit
4374da02f3
243 changed files with 21774 additions and 17408 deletions
60
.github/actions/run-and-record-tests/action.yml
vendored
60
.github/actions/run-and-record-tests/action.yml
vendored
|
@ -2,26 +2,28 @@ name: 'Run and Record Tests'
|
||||||
description: 'Run integration tests and handle recording/artifact upload'
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
required: true
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
required: false
|
|
||||||
default: ''
|
|
||||||
stack-config:
|
stack-config:
|
||||||
description: 'Stack configuration to use'
|
description: 'Stack configuration to use'
|
||||||
required: true
|
required: true
|
||||||
provider:
|
setup:
|
||||||
description: 'Provider to use for tests'
|
description: 'Setup to use for tests (e.g., ollama, gpt, vllm)'
|
||||||
required: true
|
required: false
|
||||||
|
default: ''
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
|
subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides suite'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: 'composite'
|
using: 'composite'
|
||||||
|
@ -36,14 +38,23 @@ runs:
|
||||||
- name: Run Integration Tests
|
- name: Run Integration Tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync ./scripts/integration-tests.sh \
|
SCRIPT_ARGS="--stack-config ${{ inputs.stack-config }} --inference-mode ${{ inputs.inference-mode }}"
|
||||||
--stack-config '${{ inputs.stack-config }}' \
|
|
||||||
--provider '${{ inputs.provider }}' \
|
# Add optional arguments only if they are provided
|
||||||
--test-subdirs '${{ inputs.test-subdirs }}' \
|
if [ -n '${{ inputs.setup }}' ]; then
|
||||||
--test-pattern '${{ inputs.test-pattern }}' \
|
SCRIPT_ARGS="$SCRIPT_ARGS --setup ${{ inputs.setup }}"
|
||||||
--inference-mode '${{ inputs.inference-mode }}' \
|
fi
|
||||||
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \
|
if [ -n '${{ inputs.suite }}' ]; then
|
||||||
| tee pytest-${{ inputs.inference-mode }}.log
|
SCRIPT_ARGS="$SCRIPT_ARGS --suite ${{ inputs.suite }}"
|
||||||
|
fi
|
||||||
|
if [ -n '${{ inputs.subdirs }}' ]; then
|
||||||
|
SCRIPT_ARGS="$SCRIPT_ARGS --subdirs ${{ inputs.subdirs }}"
|
||||||
|
fi
|
||||||
|
if [ -n '${{ inputs.pattern }}' ]; then
|
||||||
|
SCRIPT_ARGS="$SCRIPT_ARGS --pattern ${{ inputs.pattern }}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
uv run --no-sync ./scripts/integration-tests.sh $SCRIPT_ARGS | tee pytest-${{ inputs.inference-mode }}.log
|
||||||
|
|
||||||
|
|
||||||
- name: Commit and push recordings
|
- name: Commit and push recordings
|
||||||
|
@ -57,12 +68,7 @@ runs:
|
||||||
echo "New recordings detected, committing and pushing"
|
echo "New recordings detected, committing and pushing"
|
||||||
git add tests/integration/recordings/
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
|
||||||
git commit -m "Recordings update from CI (vision)"
|
|
||||||
else
|
|
||||||
git commit -m "Recordings update from CI"
|
|
||||||
fi
|
|
||||||
|
|
||||||
git fetch origin ${{ github.ref_name }}
|
git fetch origin ${{ github.ref_name }}
|
||||||
git rebase origin/${{ github.ref_name }}
|
git rebase origin/${{ github.ref_name }}
|
||||||
echo "Rebased successfully"
|
echo "Rebased successfully"
|
||||||
|
|
8
.github/actions/setup-ollama/action.yml
vendored
8
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,17 +1,17 @@
|
||||||
name: Setup Ollama
|
name: Setup Ollama
|
||||||
description: Start Ollama
|
description: Start Ollama
|
||||||
inputs:
|
inputs:
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Run vision tests: "true" or "false"'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Start Ollama
|
- name: Start Ollama
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
if [ "${{ inputs.suite }}" == "vision" ]; then
|
||||||
image="ollama-with-vision-model"
|
image="ollama-with-vision-model"
|
||||||
else
|
else
|
||||||
image="ollama-with-models"
|
image="ollama-with-models"
|
||||||
|
|
|
@ -8,14 +8,14 @@ inputs:
|
||||||
client-version:
|
client-version:
|
||||||
description: 'Client version (latest or published)'
|
description: 'Client version (latest or published)'
|
||||||
required: true
|
required: true
|
||||||
provider:
|
setup:
|
||||||
description: 'Provider to setup (ollama or vllm)'
|
description: 'Setup to configure (ollama, vllm, gpt, etc.)'
|
||||||
required: true
|
|
||||||
default: 'ollama'
|
|
||||||
run-vision-tests:
|
|
||||||
description: 'Whether to setup provider for vision tests'
|
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: 'ollama'
|
||||||
|
suite:
|
||||||
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
|
@ -30,13 +30,13 @@ runs:
|
||||||
client-version: ${{ inputs.client-version }}
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
- name: Setup ollama
|
- name: Setup ollama
|
||||||
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-ollama
|
uses: ./.github/actions/setup-ollama
|
||||||
with:
|
with:
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
|
|
||||||
- name: Setup vllm
|
- name: Setup vllm
|
||||||
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-vllm
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
|
|
3
.github/workflows/README.md
vendored
3
.github/workflows/README.md
vendored
|
@ -5,10 +5,11 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
|
57
.github/workflows/conformance.yml
vendored
Normal file
57
.github/workflows/conformance.yml
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# API Conformance Tests
|
||||||
|
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
||||||
|
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
||||||
|
|
||||||
|
name: API Conformance Tests
|
||||||
|
|
||||||
|
run-name: Run the API Conformance test suite on the changes.
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/**'
|
||||||
|
- '!llama_stack/ui/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- '.github/workflows/conformance.yml' # This workflow itself
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
# Cancel in-progress runs when new commits are pushed to avoid wasting CI resources
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# Job to check if API schema changes maintain backward compatibility
|
||||||
|
check-schema-compatibility:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
# Using specific version 4.1.7 because 5.0.0 fails when trying to run this locally using `act`
|
||||||
|
# This ensures consistent behavior between local testing and CI
|
||||||
|
- name: Checkout PR Code
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
|
# Checkout the base branch to compare against (usually main)
|
||||||
|
# This allows us to diff the current changes against the previous state
|
||||||
|
- name: Checkout Base Branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.pull_request.base.ref }}
|
||||||
|
path: 'base'
|
||||||
|
|
||||||
|
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||||
|
- name: Install oasdiff
|
||||||
|
run: |
|
||||||
|
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||||
|
|
||||||
|
# Run oasdiff to detect breaking changes in the API specification
|
||||||
|
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||||
|
- name: Run OpenAPI Breaking Change Diff
|
||||||
|
run: |
|
||||||
|
oasdiff breaking --fail-on ERR base/docs/_static/llama-stack-spec.yaml docs/_static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
|
||||||
|
--match-path '^/v1/vector-io' \
|
||||||
|
--match-path '^/v1/vector-dbs'
|
32
.github/workflows/integration-tests.yml
vendored
32
.github/workflows/integration-tests.yml
vendored
|
@ -1,6 +1,6 @@
|
||||||
name: Integration Tests (Replay)
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration in replay mode
|
run-name: Run the integration test suites from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
@ -28,18 +28,10 @@ on:
|
||||||
description: 'Test against both the latest and published versions'
|
description: 'Test against both the latest and published versions'
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
test-provider:
|
test-setup:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific setup'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
|
@ -50,18 +42,18 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
run-vision-tests: [true, false]
|
suite: [base, vision]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -72,16 +64,14 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
provider: ${{ matrix.provider }}
|
setup: ${{ matrix.setup }}
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
suite: ${{ matrix.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
setup: ${{ matrix.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
suite: ${{ matrix.suite }}
|
||||||
|
|
5
.github/workflows/pre-commit.yml
vendored
5
.github/workflows/pre-commit.yml
vendored
|
@ -28,7 +28,7 @@ jobs:
|
||||||
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
cache: pip
|
cache: pip
|
||||||
|
@ -37,7 +37,7 @@ jobs:
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
|
@ -48,7 +48,6 @@ jobs:
|
||||||
working-directory: llama_stack/ui
|
working-directory: llama_stack/ui
|
||||||
|
|
||||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
continue-on-error: true
|
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
RUFF_OUTPUT_FORMAT: github
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
|
uses: astral-sh/setup-uv@557e51de59eb14aaaba2ed9621916900a91d50c6 # v6.6.1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
42
.github/workflows/record-integration-tests.yml
vendored
42
.github/workflows/record-integration-tests.yml
vendored
|
@ -10,19 +10,19 @@ run-name: Run the integration test suite from tests/integration
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
test-setup:
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
description: 'Test against a specific setup'
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-provider:
|
|
||||||
description: 'Test against a specific provider'
|
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
type: boolean
|
type: string
|
||||||
default: false
|
default: ''
|
||||||
test-pattern:
|
subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides suite'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
pattern:
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
|
@ -38,11 +38,11 @@ jobs:
|
||||||
- name: Echo workflow inputs
|
- name: Echo workflow inputs
|
||||||
run: |
|
run: |
|
||||||
echo "::group::Workflow Inputs"
|
echo "::group::Workflow Inputs"
|
||||||
echo "test-subdirs: ${{ inputs.test-subdirs }}"
|
|
||||||
echo "test-provider: ${{ inputs.test-provider }}"
|
|
||||||
echo "run-vision-tests: ${{ inputs.run-vision-tests }}"
|
|
||||||
echo "test-pattern: ${{ inputs.test-pattern }}"
|
|
||||||
echo "branch: ${{ github.ref_name }}"
|
echo "branch: ${{ github.ref_name }}"
|
||||||
|
echo "test-setup: ${{ inputs.test-setup }}"
|
||||||
|
echo "suite: ${{ inputs.suite }}"
|
||||||
|
echo "subdirs: ${{ inputs.subdirs }}"
|
||||||
|
echo "pattern: ${{ inputs.pattern }}"
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -55,16 +55,16 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: "3.12" # Use single Python version for recording
|
python-version: "3.12" # Use single Python version for recording
|
||||||
client-version: "latest"
|
client-version: "latest"
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
setup: ${{ inputs.test-setup || 'ollama' }}
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run and record tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
setup: ${{ inputs.test-setup || 'ollama' }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
|
subdirs: ${{ inputs.subdirs }}
|
||||||
|
pattern: ${{ inputs.pattern }}
|
||||||
|
|
2
.github/workflows/stale_bot.yml
vendored
2
.github/workflows/stale_bot.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Stale Action
|
- name: Stale Action
|
||||||
uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
||||||
with:
|
with:
|
||||||
stale-issue-label: 'stale'
|
stale-issue-label: 'stale'
|
||||||
stale-issue-message: >
|
stale-issue-message: >
|
||||||
|
|
2
.github/workflows/ui-unit-tests.yml
vendored
2
.github/workflows/ui-unit-tests.yml
vendored
|
@ -29,7 +29,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Setup Node.js
|
- name: Setup Node.js
|
||||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
with:
|
with:
|
||||||
node-version: ${{ matrix.node-version }}
|
node-version: ${{ matrix.node-version }}
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -26,5 +26,7 @@ venv/
|
||||||
pytest-report.xml
|
pytest-report.xml
|
||||||
.coverage
|
.coverage
|
||||||
.python-version
|
.python-version
|
||||||
|
AGENTS.md
|
||||||
|
server.log
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
.claude/
|
.claude/
|
||||||
|
|
|
@ -86,7 +86,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
- id: provider-codegen
|
- id: provider-codegen
|
||||||
name: Provider Codegen
|
name: Provider Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
98
CHANGELOG.md
98
CHANGELOG.md
|
@ -1,5 +1,103 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
# v0.2.20
|
||||||
|
Published on: 2025-08-29T22:25:32Z
|
||||||
|
|
||||||
|
Here are some key changes that are coming as part of this release.
|
||||||
|
|
||||||
|
### Build and Environment
|
||||||
|
|
||||||
|
- Environment improvements: fixed env var replacement to preserve types.
|
||||||
|
- Docker stability: fixed container startup failures for Fireworks AI provider.
|
||||||
|
- Removed absolute paths in build for better portability.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
- UI Enhancements: Implemented file upload and VectorDB creation/configuration directly in UI.
|
||||||
|
- Vector Store Improvements: Added keyword, vector, and hybrid search inside vector store.
|
||||||
|
- Added S3 authorization support for file providers.
|
||||||
|
- SQL Store: Added inequality support to where clause.
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- Fixed post-training docs.
|
||||||
|
- Added Contributor Guidelines for creating Internal vs. External providers.
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Removed unsupported bfcl scoring function.
|
||||||
|
- Multiple reliability and configuration fixes for providers and environment handling.
|
||||||
|
|
||||||
|
### Engineering / Chores
|
||||||
|
|
||||||
|
- Cleaner internal development setup with consistent paths.
|
||||||
|
- Incremental improvements to provider integration and vector store behavior.
|
||||||
|
|
||||||
|
|
||||||
|
### New Contributors
|
||||||
|
- @omertuc made their first contribution in #3270
|
||||||
|
- @r3v5 made their first contribution in vector store hybrid search
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.19
|
||||||
|
Published on: 2025-08-26T22:06:55Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* feat: Add CORS configuration support for server by @skamenan7 in https://github.com/llamastack/llama-stack/pull/3201
|
||||||
|
* feat(api): introduce /rerank by @ehhuang in https://github.com/llamastack/llama-stack/pull/2940
|
||||||
|
* feat: Add S3 Files Provider by @mattf in https://github.com/llamastack/llama-stack/pull/3202
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.18
|
||||||
|
Published on: 2025-08-20T01:09:27Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* Add moderations create API
|
||||||
|
* Hybrid search in Milvus
|
||||||
|
* Numerous Responses API improvements
|
||||||
|
* Documentation updates
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.17
|
||||||
|
Published on: 2025-08-05T01:51:14Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* feat(tests): introduce inference record/replay to increase test reliability by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2941
|
||||||
|
* fix(library_client): improve initialization error handling and prevent AttributeError by @mattf in https://github.com/meta-llama/llama-stack/pull/2944
|
||||||
|
* fix: use OLLAMA_URL to activate Ollama provider in starter by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2963
|
||||||
|
* feat(UI): adding MVP playground UI by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2828
|
||||||
|
* Standardization of errors (@nathan-weinberg)
|
||||||
|
* feat: Enable DPO training with HuggingFace inline provider by @Nehanth in https://github.com/meta-llama/llama-stack/pull/2825
|
||||||
|
* chore: rename templates to distributions by @ashwinb in https://github.com/meta-llama/llama-stack/pull/3035
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.16
|
||||||
|
Published on: 2025-07-28T23:35:23Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
|
||||||
|
* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details.
|
||||||
|
* All tests migrated to pytest now (thanks @Elbehery)
|
||||||
|
* DPO implementation in the post-training provider (thanks @Nehanth)
|
||||||
|
* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
|
||||||
|
* `inline::vllm` provider is gone thank you very much
|
||||||
|
* several improvements to OpenAI inference implementations and LiteLLM backend (thanks @mattf)
|
||||||
|
* Chroma now supports Vector Store API (thanks @franciscojavierarceo).
|
||||||
|
* Authorization improvements: Vector Store/File APIs now supports access control (thanks @franciscojavierarceo); Telemetry read APIs are gated according to logged-in user's roles.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
# v0.2.15
|
# v0.2.15
|
||||||
Published on: 2025-07-16T03:30:01Z
|
Published on: 2025-07-16T03:30:01Z
|
||||||
|
|
||||||
|
|
|
@ -34,13 +34,12 @@ This data enables data-driven architectural decisions and performance optimizati
|
||||||
|
|
||||||
**1. Deploy base k8s infrastructure:**
|
**1. Deploy base k8s infrastructure:**
|
||||||
```bash
|
```bash
|
||||||
cd ../k8s
|
cd ../../docs/source/distributions/k8s
|
||||||
./apply.sh
|
./apply.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
**2. Deploy benchmark components:**
|
**2. Deploy benchmark components:**
|
||||||
```bash
|
```bash
|
||||||
cd ../k8s-benchmark
|
|
||||||
./apply.sh
|
./apply.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -56,7 +55,6 @@ kubectl get pods
|
||||||
|
|
||||||
**Benchmark Llama Stack (default):**
|
**Benchmark Llama Stack (default):**
|
||||||
```bash
|
```bash
|
||||||
cd docs/source/distributions/k8s-benchmark/
|
|
||||||
./run-benchmark.sh
|
./run-benchmark.sh
|
||||||
```
|
```
|
||||||
|
|
|
@ -14,7 +14,7 @@ import os
|
||||||
import random
|
import random
|
||||||
import statistics
|
import statistics
|
||||||
import time
|
import time
|
||||||
from typing import Tuple
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,17 +57,9 @@ class BenchmarkStats:
|
||||||
success_rate = (self.success_count / self.total_requests) * 100
|
success_rate = (self.success_count / self.total_requests) * 100
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f"BENCHMARK RESULTS")
|
print("BENCHMARK RESULTS")
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Total time: {total_time:.2f}s")
|
|
||||||
print(f"Concurrent users: {self.concurrent_users}")
|
|
||||||
print(f"Total requests: {self.total_requests}")
|
|
||||||
print(f"Successful requests: {self.success_count}")
|
|
||||||
print(f"Failed requests: {len(self.errors)}")
|
|
||||||
print(f"Success rate: {success_rate:.1f}%")
|
|
||||||
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
|
||||||
|
|
||||||
print(f"\nResponse Time Statistics:")
|
print("\nResponse Time Statistics:")
|
||||||
print(f" Mean: {statistics.mean(self.response_times):.3f}s")
|
print(f" Mean: {statistics.mean(self.response_times):.3f}s")
|
||||||
print(f" Median: {statistics.median(self.response_times):.3f}s")
|
print(f" Median: {statistics.median(self.response_times):.3f}s")
|
||||||
print(f" Min: {min(self.response_times):.3f}s")
|
print(f" Min: {min(self.response_times):.3f}s")
|
||||||
|
@ -78,14 +70,14 @@ class BenchmarkStats:
|
||||||
|
|
||||||
percentiles = [50, 90, 95, 99]
|
percentiles = [50, 90, 95, 99]
|
||||||
sorted_times = sorted(self.response_times)
|
sorted_times = sorted(self.response_times)
|
||||||
print(f"\nPercentiles:")
|
print("\nPercentiles:")
|
||||||
for p in percentiles:
|
for p in percentiles:
|
||||||
idx = int(len(sorted_times) * p / 100) - 1
|
idx = int(len(sorted_times) * p / 100) - 1
|
||||||
idx = max(0, min(idx, len(sorted_times) - 1))
|
idx = max(0, min(idx, len(sorted_times) - 1))
|
||||||
print(f" P{p}: {sorted_times[idx]:.3f}s")
|
print(f" P{p}: {sorted_times[idx]:.3f}s")
|
||||||
|
|
||||||
if self.ttft_times:
|
if self.ttft_times:
|
||||||
print(f"\nTime to First Token (TTFT) Statistics:")
|
print("\nTime to First Token (TTFT) Statistics:")
|
||||||
print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
|
print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
|
||||||
print(f" Median: {statistics.median(self.ttft_times):.3f}s")
|
print(f" Median: {statistics.median(self.ttft_times):.3f}s")
|
||||||
print(f" Min: {min(self.ttft_times):.3f}s")
|
print(f" Min: {min(self.ttft_times):.3f}s")
|
||||||
|
@ -95,26 +87,35 @@ class BenchmarkStats:
|
||||||
print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
|
print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
|
||||||
|
|
||||||
sorted_ttft = sorted(self.ttft_times)
|
sorted_ttft = sorted(self.ttft_times)
|
||||||
print(f"\nTTFT Percentiles:")
|
print("\nTTFT Percentiles:")
|
||||||
for p in percentiles:
|
for p in percentiles:
|
||||||
idx = int(len(sorted_ttft) * p / 100) - 1
|
idx = int(len(sorted_ttft) * p / 100) - 1
|
||||||
idx = max(0, min(idx, len(sorted_ttft) - 1))
|
idx = max(0, min(idx, len(sorted_ttft) - 1))
|
||||||
print(f" P{p}: {sorted_ttft[idx]:.3f}s")
|
print(f" P{p}: {sorted_ttft[idx]:.3f}s")
|
||||||
|
|
||||||
if self.chunks_received:
|
if self.chunks_received:
|
||||||
print(f"\nStreaming Statistics:")
|
print("\nStreaming Statistics:")
|
||||||
print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
|
print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
|
||||||
print(f" Total chunks received: {sum(self.chunks_received)}")
|
print(f" Total chunks received: {sum(self.chunks_received)}")
|
||||||
|
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
print(f"Total time: {total_time:.2f}s")
|
||||||
|
print(f"Concurrent users: {self.concurrent_users}")
|
||||||
|
print(f"Total requests: {self.total_requests}")
|
||||||
|
print(f"Successful requests: {self.success_count}")
|
||||||
|
print(f"Failed requests: {len(self.errors)}")
|
||||||
|
print(f"Success rate: {success_rate:.1f}%")
|
||||||
|
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
||||||
|
|
||||||
if self.errors:
|
if self.errors:
|
||||||
print(f"\nErrors (showing first 5):")
|
print("\nErrors (showing first 5):")
|
||||||
for error in self.errors[:5]:
|
for error in self.errors[:5]:
|
||||||
print(f" {error}")
|
print(f" {error}")
|
||||||
|
|
||||||
|
|
||||||
class LlamaStackBenchmark:
|
class LlamaStackBenchmark:
|
||||||
def __init__(self, base_url: str, model_id: str):
|
def __init__(self, base_url: str, model_id: str):
|
||||||
self.base_url = base_url.rstrip('/')
|
self.base_url = base_url.rstrip("/")
|
||||||
self.model_id = model_id
|
self.model_id = model_id
|
||||||
self.headers = {"Content-Type": "application/json"}
|
self.headers = {"Content-Type": "application/json"}
|
||||||
self.test_messages = [
|
self.test_messages = [
|
||||||
|
@ -125,20 +126,14 @@ class LlamaStackBenchmark:
|
||||||
[
|
[
|
||||||
{"role": "user", "content": "What is machine learning?"},
|
{"role": "user", "content": "What is machine learning?"},
|
||||||
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
|
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
|
||||||
{"role": "user", "content": "Can you give me a practical example?"}
|
{"role": "user", "content": "Can you give me a practical example?"},
|
||||||
]
|
],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def make_async_streaming_request(self) -> tuple[float, int, float | None, str | None]:
|
||||||
async def make_async_streaming_request(self) -> Tuple[float, int, float | None, str | None]:
|
|
||||||
"""Make a single async streaming chat completion request."""
|
"""Make a single async streaming chat completion request."""
|
||||||
messages = random.choice(self.test_messages)
|
messages = random.choice(self.test_messages)
|
||||||
payload = {
|
payload = {"model": self.model_id, "messages": messages, "stream": True, "max_tokens": 100}
|
||||||
"model": self.model_id,
|
|
||||||
"messages": messages,
|
|
||||||
"stream": True,
|
|
||||||
"max_tokens": 100
|
|
||||||
}
|
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
chunks_received = 0
|
chunks_received = 0
|
||||||
|
@ -152,17 +147,17 @@ class LlamaStackBenchmark:
|
||||||
f"{self.base_url}/chat/completions",
|
f"{self.base_url}/chat/completions",
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=aiohttp.ClientTimeout(total=30)
|
timeout=aiohttp.ClientTimeout(total=30),
|
||||||
) as response:
|
) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
async for line in response.content:
|
async for line in response.content:
|
||||||
if line:
|
if line:
|
||||||
line_str = line.decode('utf-8').strip()
|
line_str = line.decode("utf-8").strip()
|
||||||
if line_str.startswith('data: '):
|
if line_str.startswith("data: "):
|
||||||
chunks_received += 1
|
chunks_received += 1
|
||||||
if ttft is None:
|
if ttft is None:
|
||||||
ttft = time.time() - start_time
|
ttft = time.time() - start_time
|
||||||
if line_str == 'data: [DONE]':
|
if line_str == "data: [DONE]":
|
||||||
break
|
break
|
||||||
|
|
||||||
if chunks_received == 0:
|
if chunks_received == 0:
|
||||||
|
@ -179,7 +174,6 @@ class LlamaStackBenchmark:
|
||||||
response_time = time.time() - start_time
|
response_time = time.time() - start_time
|
||||||
return response_time, chunks_received, ttft, error
|
return response_time, chunks_received, ttft, error
|
||||||
|
|
||||||
|
|
||||||
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
|
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
|
||||||
"""Run benchmark using async requests for specified duration."""
|
"""Run benchmark using async requests for specified duration."""
|
||||||
stats = BenchmarkStats()
|
stats = BenchmarkStats()
|
||||||
|
@ -191,7 +185,7 @@ class LlamaStackBenchmark:
|
||||||
print(f"Model: {self.model_id}")
|
print(f"Model: {self.model_id}")
|
||||||
|
|
||||||
connector = aiohttp.TCPConnector(limit=concurrent_users)
|
connector = aiohttp.TCPConnector(limit=concurrent_users)
|
||||||
async with aiohttp.ClientSession(connector=connector) as session:
|
async with aiohttp.ClientSession(connector=connector):
|
||||||
|
|
||||||
async def worker(worker_id: int):
|
async def worker(worker_id: int):
|
||||||
"""Worker that sends requests sequentially until canceled."""
|
"""Worker that sends requests sequentially until canceled."""
|
||||||
|
@ -215,7 +209,9 @@ class LlamaStackBenchmark:
|
||||||
await asyncio.sleep(1) # Report every second
|
await asyncio.sleep(1) # Report every second
|
||||||
if time.time() >= last_report_time + 10: # Report every 10 seconds
|
if time.time() >= last_report_time + 10: # Report every 10 seconds
|
||||||
elapsed = time.time() - stats.start_time
|
elapsed = time.time() - stats.start_time
|
||||||
print(f"Completed: {stats.total_requests} requests in {elapsed:.1f}s")
|
print(
|
||||||
|
f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}"
|
||||||
|
)
|
||||||
last_report_time = time.time()
|
last_report_time = time.time()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
break
|
break
|
||||||
|
@ -240,14 +236,16 @@ class LlamaStackBenchmark:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
|
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
|
||||||
parser.add_argument("--base-url", default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
parser.add_argument(
|
||||||
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)")
|
"--base-url",
|
||||||
parser.add_argument("--model", default=os.getenv("INFERENCE_MODEL", "test-model"),
|
default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
||||||
help="Model ID to use for requests")
|
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)",
|
||||||
parser.add_argument("--duration", type=int, default=60,
|
)
|
||||||
help="Duration in seconds to run benchmark (default: 60)")
|
parser.add_argument(
|
||||||
parser.add_argument("--concurrent", type=int, default=10,
|
"--model", default=os.getenv("INFERENCE_MODEL", "test-model"), help="Model ID to use for requests"
|
||||||
help="Number of concurrent users (default: 10)")
|
)
|
||||||
|
parser.add_argument("--duration", type=int, default=60, help="Duration in seconds to run benchmark (default: 60)")
|
||||||
|
parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent users (default: 10)")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -11,16 +11,18 @@ OpenAI-compatible mock server that returns:
|
||||||
- Valid OpenAI-formatted chat completion responses with dynamic content
|
- Valid OpenAI-formatted chat completion responses with dynamic content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from flask import Flask, request, jsonify, Response
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
import uuid
|
|
||||||
import json
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from flask import Flask, Response, jsonify, request
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Models from environment variables
|
# Models from environment variables
|
||||||
def get_models():
|
def get_models():
|
||||||
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
|
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
|
||||||
|
@ -29,40 +31,72 @@ def get_models():
|
||||||
return {
|
return {
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": [
|
"data": [
|
||||||
{
|
{"id": model_id, "object": "model", "created": 1234567890, "owned_by": "vllm"} for model_id in model_ids
|
||||||
"id": model_id,
|
],
|
||||||
"object": "model",
|
|
||||||
"created": 1234567890,
|
|
||||||
"owned_by": "vllm"
|
|
||||||
}
|
|
||||||
for model_id in model_ids
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generate_random_text(length=50):
|
def generate_random_text(length=50):
|
||||||
"""Generate random but coherent text for responses."""
|
"""Generate random but coherent text for responses."""
|
||||||
words = [
|
words = [
|
||||||
"Hello", "there", "I'm", "an", "AI", "assistant", "ready", "to", "help", "you",
|
"Hello",
|
||||||
"with", "your", "questions", "and", "tasks", "today", "Let", "me","know", "what",
|
"there",
|
||||||
"you'd", "like", "to", "discuss", "or", "explore", "together", "I", "can", "assist",
|
"I'm",
|
||||||
"with", "various", "topics", "including", "coding", "writing", "analysis", "and", "more"
|
"an",
|
||||||
|
"AI",
|
||||||
|
"assistant",
|
||||||
|
"ready",
|
||||||
|
"to",
|
||||||
|
"help",
|
||||||
|
"you",
|
||||||
|
"with",
|
||||||
|
"your",
|
||||||
|
"questions",
|
||||||
|
"and",
|
||||||
|
"tasks",
|
||||||
|
"today",
|
||||||
|
"Let",
|
||||||
|
"me",
|
||||||
|
"know",
|
||||||
|
"what",
|
||||||
|
"you'd",
|
||||||
|
"like",
|
||||||
|
"to",
|
||||||
|
"discuss",
|
||||||
|
"or",
|
||||||
|
"explore",
|
||||||
|
"together",
|
||||||
|
"I",
|
||||||
|
"can",
|
||||||
|
"assist",
|
||||||
|
"with",
|
||||||
|
"various",
|
||||||
|
"topics",
|
||||||
|
"including",
|
||||||
|
"coding",
|
||||||
|
"writing",
|
||||||
|
"analysis",
|
||||||
|
"and",
|
||||||
|
"more",
|
||||||
]
|
]
|
||||||
return " ".join(random.choices(words, k=length))
|
return " ".join(random.choices(words, k=length))
|
||||||
|
|
||||||
@app.route('/v1/models', methods=['GET'])
|
|
||||||
|
@app.route("/v1/models", methods=["GET"])
|
||||||
def list_models():
|
def list_models():
|
||||||
models = get_models()
|
models = get_models()
|
||||||
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
|
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
|
||||||
return jsonify(models)
|
return jsonify(models)
|
||||||
|
|
||||||
@app.route('/v1/chat/completions', methods=['POST'])
|
|
||||||
|
@app.route("/v1/chat/completions", methods=["POST"])
|
||||||
def chat_completions():
|
def chat_completions():
|
||||||
"""Return OpenAI-formatted chat completion responses."""
|
"""Return OpenAI-formatted chat completion responses."""
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
default_model = get_models()['data'][0]['id']
|
default_model = get_models()["data"][0]["id"]
|
||||||
model = data.get('model', default_model)
|
model = data.get("model", default_model)
|
||||||
messages = data.get('messages', [])
|
messages = data.get("messages", [])
|
||||||
stream = data.get('stream', False)
|
stream = data.get("stream", False)
|
||||||
|
|
||||||
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
|
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
|
||||||
|
|
||||||
|
@ -71,11 +105,12 @@ def chat_completions():
|
||||||
else:
|
else:
|
||||||
return handle_non_streaming_completion(model, messages)
|
return handle_non_streaming_completion(model, messages)
|
||||||
|
|
||||||
|
|
||||||
def handle_non_streaming_completion(model, messages):
|
def handle_non_streaming_completion(model, messages):
|
||||||
response_text = generate_random_text(random.randint(20, 80))
|
response_text = generate_random_text(random.randint(20, 80))
|
||||||
|
|
||||||
# Calculate realistic token counts
|
# Calculate realistic token counts
|
||||||
prompt_tokens = sum(len(str(msg.get('content', '')).split()) for msg in messages)
|
prompt_tokens = sum(len(str(msg.get("content", "")).split()) for msg in messages)
|
||||||
completion_tokens = len(response_text.split())
|
completion_tokens = len(response_text.split())
|
||||||
|
|
||||||
response = {
|
response = {
|
||||||
|
@ -83,25 +118,17 @@ def handle_non_streaming_completion(model, messages):
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": response_text
|
|
||||||
},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
|
|
||||||
def handle_streaming_completion(model, messages):
|
def handle_streaming_completion(model, messages):
|
||||||
def generate_stream():
|
def generate_stream():
|
||||||
# Generate response text
|
# Generate response text
|
||||||
|
@ -114,12 +141,7 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"role": "assistant", "content": ""}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
||||||
|
|
||||||
|
@ -130,12 +152,7 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"content": f"{word} " if i < len(words) - 1 else word}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": f"{word} " if i < len(words) - 1 else word}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
# Configurable delay to simulate realistic streaming
|
# Configurable delay to simulate realistic streaming
|
||||||
|
@ -148,35 +165,30 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"content": ""}, "finish_reason": "stop"}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": ""},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(final_chunk)}\n\n"
|
yield f"data: {json.dumps(final_chunk)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
generate_stream(),
|
generate_stream(),
|
||||||
mimetype='text/event-stream',
|
mimetype="text/event-stream",
|
||||||
headers={
|
headers={
|
||||||
'Cache-Control': 'no-cache',
|
"Cache-Control": "no-cache",
|
||||||
'Connection': 'keep-alive',
|
"Connection": "keep-alive",
|
||||||
'Access-Control-Allow-Origin': '*',
|
"Access-Control-Allow-Origin": "*",
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@app.route('/health', methods=['GET'])
|
|
||||||
|
@app.route("/health", methods=["GET"])
|
||||||
def health():
|
def health():
|
||||||
return jsonify({"status": "healthy", "type": "openai-mock"})
|
return jsonify({"status": "healthy", "type": "openai-mock"})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description='OpenAI-compatible mock server')
|
if __name__ == "__main__":
|
||||||
parser.add_argument('--port', type=int, default=8081,
|
parser = argparse.ArgumentParser(description="OpenAI-compatible mock server")
|
||||||
help='Port to run the server on (default: 8081)')
|
parser.add_argument("--port", type=int, default=8081, help="Port to run the server on (default: 8081)")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
port = args.port
|
port = args.port
|
||||||
|
@ -187,4 +199,4 @@ if __name__ == '__main__':
|
||||||
print("- OpenAI-formatted chat/completion responses with dynamic content")
|
print("- OpenAI-formatted chat/completion responses with dynamic content")
|
||||||
print("- Streaming support with valid SSE format")
|
print("- Streaming support with valid SSE format")
|
||||||
print(f"- Listening on: http://0.0.0.0:{port}")
|
print(f"- Listening on: http://0.0.0.0:{port}")
|
||||||
app.run(host='0.0.0.0', port=port, debug=False)
|
app.run(host="0.0.0.0", port=port, debug=False)
|
|
@ -6,6 +6,7 @@ data:
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
@ -19,13 +20,6 @@ data:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
- provider_id: vllm-safety
|
|
||||||
provider_type: remote::vllm
|
|
||||||
config:
|
|
||||||
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
|
@ -41,6 +35,14 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -111,9 +113,6 @@ data:
|
||||||
- model_id: ${env.INFERENCE_MODEL}
|
- model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- model_id: ${env.SAFETY_MODEL}
|
|
||||||
provider_id: vllm-safety
|
|
||||||
model_type: llm
|
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
|
@ -2,7 +2,10 @@ version: '2'
|
||||||
image_name: kubernetes-benchmark-demo
|
image_name: kubernetes-benchmark-demo
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- files
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
|
@ -18,6 +21,14 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
|
@ -30,6 +41,19 @@ providers:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config:
|
||||||
|
excluded_categories: []
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -95,6 +119,8 @@ models:
|
||||||
- model_id: ${env.INFERENCE_MODEL}
|
- model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
shields:
|
||||||
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
101
docs/_static/css/my_theme.css
vendored
101
docs/_static/css/my_theme.css
vendored
|
@ -1,5 +1,106 @@
|
||||||
@import url("theme.css");
|
@import url("theme.css");
|
||||||
|
|
||||||
|
/* Horizontal Navigation Bar */
|
||||||
|
.horizontal-nav {
|
||||||
|
background-color: #ffffff;
|
||||||
|
border-bottom: 1px solid #e5e5e5;
|
||||||
|
padding: 0;
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
z-index: 1050;
|
||||||
|
height: 50px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav {
|
||||||
|
background-color: #1a1a1a;
|
||||||
|
border-bottom: 1px solid #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-container {
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 0 auto;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 0 20px;
|
||||||
|
height: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-brand {
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #333;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-brand {
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 30px;
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a {
|
||||||
|
color: #666;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 500;
|
||||||
|
padding: 8px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a:hover,
|
||||||
|
.horizontal-nav .nav-links a.active {
|
||||||
|
color: #333;
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a.active {
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a {
|
||||||
|
color: #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a:hover,
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a.active {
|
||||||
|
color: #fff;
|
||||||
|
background-color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links .github-link {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links .github-icon {
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
fill: currentColor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust main content to account for fixed nav */
|
||||||
|
.wy-nav-side {
|
||||||
|
top: 50px;
|
||||||
|
height: calc(100vh - 50px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.wy-nav-content-wrap {
|
||||||
|
margin-top: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
.wy-nav-content {
|
.wy-nav-content {
|
||||||
max-width: 90%;
|
max-width: 90%;
|
||||||
}
|
}
|
||||||
|
|
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Horizontal Navigation Bar for Llama Stack Documentation
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
// Create the horizontal navigation HTML
|
||||||
|
const navHTML = `
|
||||||
|
<nav class="horizontal-nav">
|
||||||
|
<div class="nav-container">
|
||||||
|
<a href="/" class="nav-brand">Llama Stack</a>
|
||||||
|
<ul class="nav-links">
|
||||||
|
<li><a href="/">Docs</a></li>
|
||||||
|
<li><a href="/references/api_reference/">API Reference</a></li>
|
||||||
|
<li><a href="https://github.com/meta-llama/llama-stack" target="_blank" class="github-link">
|
||||||
|
<svg class="github-icon" viewBox="0 0 16 16" aria-hidden="true">
|
||||||
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/>
|
||||||
|
</svg>
|
||||||
|
GitHub
|
||||||
|
</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Insert the navigation at the beginning of the body
|
||||||
|
document.body.insertAdjacentHTML('afterbegin', navHTML);
|
||||||
|
|
||||||
|
// Update navigation links based on current page
|
||||||
|
updateActiveNav();
|
||||||
|
});
|
||||||
|
|
||||||
|
function updateActiveNav() {
|
||||||
|
const currentPath = window.location.pathname;
|
||||||
|
const navLinks = document.querySelectorAll('.horizontal-nav .nav-links a');
|
||||||
|
|
||||||
|
navLinks.forEach(link => {
|
||||||
|
// Remove any existing active classes
|
||||||
|
link.classList.remove('active');
|
||||||
|
|
||||||
|
// Add active class based on current path
|
||||||
|
if (currentPath === '/' && link.getAttribute('href') === '/') {
|
||||||
|
link.classList.add('active');
|
||||||
|
} else if (currentPath.includes('/references/api_reference/') && link.getAttribute('href').includes('api_reference')) {
|
||||||
|
link.classList.add('active');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
457
docs/_static/llama-stack-spec.html
vendored
457
docs/_static/llama-stack-spec.html
vendored
|
@ -633,6 +633,80 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A ListPromptsResponse containing all prompts.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListPromptsResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "List all prompts.",
|
||||||
|
"parameters": []
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The created Prompt resource.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Create a new prompt.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CreatePromptRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/agents/{agent_id}": {
|
"/v1/agents/{agent_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -901,6 +975,143 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A Prompt resource.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Get a prompt by its identifier and optional version.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to get.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "version",
|
||||||
|
"in": "query",
|
||||||
|
"description": "The version of the prompt to get (defaults to latest).",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The updated Prompt resource with incremented version.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Update an existing prompt (increments version).",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to update.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/UpdatePromptRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Delete a prompt.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to delete.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/inference/embeddings": {
|
"/v1/inference/embeddings": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -2836,6 +3047,49 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}/versions": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A ListPromptsResponse containing all versions of the prompt.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListPromptsResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "List all versions of a specific prompt.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to list versions for.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/providers": {
|
"/v1/providers": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -5007,6 +5261,59 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}/set-default-version": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The prompt with the specified version now set as default.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/SetDefaultVersionRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/post-training/supervised-fine-tune": {
|
"/v1/post-training/supervised-fine-tune": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -9670,6 +9977,65 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
||||||
},
|
},
|
||||||
|
"CreatePromptRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The prompt text content with variable placeholders."
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "List of variable names that can be used in the prompt template."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"prompt"
|
||||||
|
],
|
||||||
|
"title": "CreatePromptRequest"
|
||||||
|
},
|
||||||
|
"Prompt": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Version (integer starting at 1, incremented on save)"
|
||||||
|
},
|
||||||
|
"prompt_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'"
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "List of prompt variable names that can be used in the prompt template"
|
||||||
|
},
|
||||||
|
"is_default": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "Boolean indicating whether this version is the default version for this prompt"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"version",
|
||||||
|
"prompt_id",
|
||||||
|
"variables",
|
||||||
|
"is_default"
|
||||||
|
],
|
||||||
|
"title": "Prompt",
|
||||||
|
"description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."
|
||||||
|
},
|
||||||
"OpenAIDeleteResponseObject": {
|
"OpenAIDeleteResponseObject": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -10296,7 +10662,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "benchmark",
|
"const": "benchmark",
|
||||||
"default": "benchmark",
|
"default": "benchmark",
|
||||||
|
@ -10923,7 +11290,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "dataset",
|
"const": "dataset",
|
||||||
"default": "dataset",
|
"default": "dataset",
|
||||||
|
@ -11073,7 +11441,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "model",
|
"const": "model",
|
||||||
"default": "model",
|
"default": "model",
|
||||||
|
@ -11338,7 +11707,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "scoring_function",
|
"const": "scoring_function",
|
||||||
"default": "scoring_function",
|
"default": "scoring_function",
|
||||||
|
@ -11446,7 +11816,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "shield",
|
"const": "shield",
|
||||||
"default": "shield",
|
"default": "shield",
|
||||||
|
@ -11691,7 +12062,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "tool",
|
"const": "tool",
|
||||||
"default": "tool",
|
"default": "tool",
|
||||||
|
@ -11773,7 +12145,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "tool_group",
|
"const": "tool_group",
|
||||||
"default": "tool_group",
|
"default": "tool_group",
|
||||||
|
@ -12067,7 +12440,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "vector_db",
|
"const": "vector_db",
|
||||||
"default": "vector_db",
|
"default": "vector_db",
|
||||||
|
@ -12882,6 +13256,23 @@
|
||||||
"title": "OpenAIResponseObjectWithInput",
|
"title": "OpenAIResponseObjectWithInput",
|
||||||
"description": "OpenAI response object extended with input context information."
|
"description": "OpenAI response object extended with input context information."
|
||||||
},
|
},
|
||||||
|
"ListPromptsResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"data"
|
||||||
|
],
|
||||||
|
"title": "ListPromptsResponse",
|
||||||
|
"description": "Response model to list prompts."
|
||||||
|
},
|
||||||
"ListProvidersResponse": {
|
"ListProvidersResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -17129,6 +17520,20 @@
|
||||||
"title": "ScoreBatchResponse",
|
"title": "ScoreBatchResponse",
|
||||||
"description": "Response from batch scoring operations on datasets."
|
"description": "Response from batch scoring operations on datasets."
|
||||||
},
|
},
|
||||||
|
"SetDefaultVersionRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "The version to set as default."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"version"
|
||||||
|
],
|
||||||
|
"title": "SetDefaultVersionRequest"
|
||||||
|
},
|
||||||
"AlgorithmConfig": {
|
"AlgorithmConfig": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
@ -17413,6 +17818,37 @@
|
||||||
"title": "SyntheticDataGenerationResponse",
|
"title": "SyntheticDataGenerationResponse",
|
||||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||||
},
|
},
|
||||||
|
"UpdatePromptRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The updated prompt text content."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "The current version of the prompt being updated."
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Updated list of variable names that can be used in the prompt template."
|
||||||
|
},
|
||||||
|
"set_as_default": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Set the new version as the default (default=True)."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"prompt",
|
||||||
|
"version",
|
||||||
|
"set_as_default"
|
||||||
|
],
|
||||||
|
"title": "UpdatePromptRequest"
|
||||||
|
},
|
||||||
"VersionInfo": {
|
"VersionInfo": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -17538,6 +17974,10 @@
|
||||||
{
|
{
|
||||||
"name": "PostTraining (Coming Soon)"
|
"name": "PostTraining (Coming Soon)"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Prompts",
|
||||||
|
"x-displayName": "Protocol for prompt management operations."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||||
|
@ -17588,6 +18028,7 @@
|
||||||
"Inspect",
|
"Inspect",
|
||||||
"Models",
|
"Models",
|
||||||
"PostTraining (Coming Soon)",
|
"PostTraining (Coming Soon)",
|
||||||
|
"Prompts",
|
||||||
"Providers",
|
"Providers",
|
||||||
"Safety",
|
"Safety",
|
||||||
"Scoring",
|
"Scoring",
|
||||||
|
|
332
docs/_static/llama-stack-spec.yaml
vendored
332
docs/_static/llama-stack-spec.yaml
vendored
|
@ -427,6 +427,58 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/prompts:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
A ListPromptsResponse containing all prompts.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListPromptsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: List all prompts.
|
||||||
|
parameters: []
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created Prompt resource.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: Create a new prompt.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreatePromptRequest'
|
||||||
|
required: true
|
||||||
/v1/agents/{agent_id}:
|
/v1/agents/{agent_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -616,6 +668,103 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
/v1/prompts/{prompt_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A Prompt resource.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Get a prompt by its identifier and optional version.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to get.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: version
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The version of the prompt to get (defaults to latest).
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
The updated Prompt resource with incremented version.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Update an existing prompt (increments version).
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to update.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/UpdatePromptRequest'
|
||||||
|
required: true
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: Delete a prompt.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to delete.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/inference/embeddings:
|
/v1/inference/embeddings:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1983,6 +2132,37 @@ paths:
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/Order'
|
$ref: '#/components/schemas/Order'
|
||||||
|
/v1/prompts/{prompt_id}/versions:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListPromptsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: List all versions of a specific prompt.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: >-
|
||||||
|
The identifier of the prompt to list versions for.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/providers:
|
/v1/providers:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -3546,6 +3726,43 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/ScoreBatchRequest'
|
$ref: '#/components/schemas/ScoreBatchRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/prompts/{prompt_id}/set-default-version:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
The prompt with the specified version now set as default.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/SetDefaultVersionRequest'
|
||||||
|
required: true
|
||||||
/v1/post-training/supervised-fine-tune:
|
/v1/post-training/supervised-fine-tune:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -7148,6 +7365,61 @@ components:
|
||||||
- type
|
- type
|
||||||
title: >-
|
title: >-
|
||||||
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
||||||
|
CreatePromptRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The prompt text content with variable placeholders.
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of variable names that can be used in the prompt template.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- prompt
|
||||||
|
title: CreatePromptRequest
|
||||||
|
Prompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The system prompt text with variable placeholders. Variables are only
|
||||||
|
supported when using the Responses API.
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
Version (integer starting at 1, incremented on save)
|
||||||
|
prompt_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of prompt variable names that can be used in the prompt template
|
||||||
|
is_default:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
description: >-
|
||||||
|
Boolean indicating whether this version is the default version for this
|
||||||
|
prompt
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- version
|
||||||
|
- prompt_id
|
||||||
|
- variables
|
||||||
|
- is_default
|
||||||
|
title: Prompt
|
||||||
|
description: >-
|
||||||
|
A prompt resource representing a stored OpenAI Compatible prompt template
|
||||||
|
in Llama Stack.
|
||||||
OpenAIDeleteResponseObject:
|
OpenAIDeleteResponseObject:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7621,6 +7893,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: benchmark
|
const: benchmark
|
||||||
default: benchmark
|
default: benchmark
|
||||||
description: The resource type, always benchmark
|
description: The resource type, always benchmark
|
||||||
|
@ -8107,6 +8380,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: dataset
|
const: dataset
|
||||||
default: dataset
|
default: dataset
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8219,6 +8493,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: model
|
const: model
|
||||||
default: model
|
default: model
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8410,6 +8685,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: scoring_function
|
const: scoring_function
|
||||||
default: scoring_function
|
default: scoring_function
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8486,6 +8762,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: shield
|
const: shield
|
||||||
default: shield
|
default: shield
|
||||||
description: The resource type, always shield
|
description: The resource type, always shield
|
||||||
|
@ -8665,6 +8942,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: tool
|
const: tool
|
||||||
default: tool
|
default: tool
|
||||||
description: Type of resource, always 'tool'
|
description: Type of resource, always 'tool'
|
||||||
|
@ -8723,6 +9001,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: tool_group
|
const: tool_group
|
||||||
default: tool_group
|
default: tool_group
|
||||||
description: Type of resource, always 'tool_group'
|
description: Type of resource, always 'tool_group'
|
||||||
|
@ -8951,6 +9230,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: vector_db
|
const: vector_db
|
||||||
default: vector_db
|
default: vector_db
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -9577,6 +9857,18 @@ components:
|
||||||
title: OpenAIResponseObjectWithInput
|
title: OpenAIResponseObjectWithInput
|
||||||
description: >-
|
description: >-
|
||||||
OpenAI response object extended with input context information.
|
OpenAI response object extended with input context information.
|
||||||
|
ListPromptsResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- data
|
||||||
|
title: ListPromptsResponse
|
||||||
|
description: Response model to list prompts.
|
||||||
ListProvidersResponse:
|
ListProvidersResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -12723,6 +13015,16 @@ components:
|
||||||
title: ScoreBatchResponse
|
title: ScoreBatchResponse
|
||||||
description: >-
|
description: >-
|
||||||
Response from batch scoring operations on datasets.
|
Response from batch scoring operations on datasets.
|
||||||
|
SetDefaultVersionRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: The version to set as default.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- version
|
||||||
|
title: SetDefaultVersionRequest
|
||||||
AlgorithmConfig:
|
AlgorithmConfig:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||||
|
@ -12919,6 +13221,32 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||||
tuples that pass the threshold.
|
tuples that pass the threshold.
|
||||||
|
UpdatePromptRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: The updated prompt text content.
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The current version of the prompt being updated.
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Updated list of variable names that can be used in the prompt template.
|
||||||
|
set_as_default:
|
||||||
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
Set the new version as the default (default=True).
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- prompt
|
||||||
|
- version
|
||||||
|
- set_as_default
|
||||||
|
title: UpdatePromptRequest
|
||||||
VersionInfo:
|
VersionInfo:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -13030,6 +13358,9 @@ tags:
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
|
- name: Prompts
|
||||||
|
x-displayName: >-
|
||||||
|
Protocol for prompt management operations.
|
||||||
- name: Providers
|
- name: Providers
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
@ -13057,6 +13388,7 @@ x-tagGroups:
|
||||||
- Inspect
|
- Inspect
|
||||||
- Models
|
- Models
|
||||||
- PostTraining (Coming Soon)
|
- PostTraining (Coming Soon)
|
||||||
|
- Prompts
|
||||||
- Providers
|
- Providers
|
||||||
- Safety
|
- Safety
|
||||||
- Scoring
|
- Scoring
|
||||||
|
|
|
@ -131,6 +131,7 @@ html_static_path = ["../_static"]
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_css_file("css/my_theme.css")
|
app.add_css_file("css/my_theme.css")
|
||||||
app.add_js_file("js/detect_theme.js")
|
app.add_js_file("js/detect_theme.js")
|
||||||
|
app.add_js_file("js/horizontal_nav.js")
|
||||||
app.add_js_file("js/keyboard_shortcuts.js")
|
app.add_js_file("js/keyboard_shortcuts.js")
|
||||||
|
|
||||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||||
|
|
|
@ -35,5 +35,5 @@ testing/record-replay
|
||||||
|
|
||||||
### Benchmarking
|
### Benchmarking
|
||||||
|
|
||||||
```{include} ../../../docs/source/distributions/k8s-benchmark/README.md
|
```{include} ../../../benchmarking/k8s-benchmark/README.md
|
||||||
```
|
```
|
||||||
|
|
|
@ -40,18 +40,15 @@ The system patches OpenAI and Ollama client methods to intercept calls before th
|
||||||
|
|
||||||
### Storage Architecture
|
### Storage Architecture
|
||||||
|
|
||||||
Recordings use a two-tier storage system optimized for both speed and debuggability:
|
Recordings are stored as JSON files in the recording directory. They are looked up by their request hash.
|
||||||
|
|
||||||
```
|
```
|
||||||
recordings/
|
recordings/
|
||||||
├── index.sqlite # Fast lookup by request hash
|
|
||||||
└── responses/
|
└── responses/
|
||||||
├── abc123def456.json # Individual response files
|
├── abc123def456.json # Individual response files
|
||||||
└── def789ghi012.json
|
└── def789ghi012.json
|
||||||
```
|
```
|
||||||
|
|
||||||
**SQLite index** enables O(log n) hash lookups and metadata queries without loading response bodies.
|
|
||||||
|
|
||||||
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
||||||
|
|
||||||
## Recording Modes
|
## Recording Modes
|
||||||
|
@ -166,8 +163,8 @@ This preserves type safety - when replayed, you get the same Pydantic objects wi
|
||||||
Control recording behavior globally:
|
Control recording behavior globally:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export LLAMA_STACK_TEST_INFERENCE_MODE=replay
|
export LLAMA_STACK_TEST_INFERENCE_MODE=replay # this is the default
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings
|
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings # default is tests/integration/recordings
|
||||||
pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -354,6 +354,47 @@ You can easily validate a request by running:
|
||||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Kubernetes Authentication Provider
|
||||||
|
|
||||||
|
The server can be configured to use Kubernetes SelfSubjectReview API to validate tokens directly against the Kubernetes API server:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
auth:
|
||||||
|
provider_config:
|
||||||
|
type: "kubernetes"
|
||||||
|
api_server_url: "https://kubernetes.default.svc"
|
||||||
|
claims_mapping:
|
||||||
|
username: "roles"
|
||||||
|
groups: "roles"
|
||||||
|
uid: "uid_attr"
|
||||||
|
verify_tls: true
|
||||||
|
tls_cafile: "/path/to/ca.crt"
|
||||||
|
```
|
||||||
|
|
||||||
|
Configuration options:
|
||||||
|
- `api_server_url`: The Kubernetes API server URL (e.g., https://kubernetes.default.svc:6443)
|
||||||
|
- `verify_tls`: Whether to verify TLS certificates (default: true)
|
||||||
|
- `tls_cafile`: Path to CA certificate file for TLS verification
|
||||||
|
- `claims_mapping`: Mapping of Kubernetes user claims to access attributes
|
||||||
|
|
||||||
|
The provider validates tokens by sending a SelfSubjectReview request to the Kubernetes API server at `/apis/authentication.k8s.io/v1/selfsubjectreviews`. The provider extracts user information from the response:
|
||||||
|
- Username from the `userInfo.username` field
|
||||||
|
- Groups from the `userInfo.groups` field
|
||||||
|
- UID from the `userInfo.uid` field
|
||||||
|
|
||||||
|
To obtain a token for testing:
|
||||||
|
```bash
|
||||||
|
kubectl create namespace llama-stack
|
||||||
|
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||||
|
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||||
|
```
|
||||||
|
|
||||||
|
You can validate a request by running:
|
||||||
|
```bash
|
||||||
|
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||||
|
```
|
||||||
|
|
||||||
#### GitHub Token Provider
|
#### GitHub Token Provider
|
||||||
Validates GitHub personal access tokens or OAuth tokens directly:
|
Validates GitHub personal access tokens or OAuth tokens directly:
|
||||||
```yaml
|
```yaml
|
||||||
|
|
|
@ -1,137 +1,55 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
stack_run_config.yaml: |
|
stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
|
||||||
version: '2'
|
inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
|
||||||
image_name: kubernetes-demo
|
\ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n
|
||||||
apis:
|
\ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens:
|
||||||
- agents
|
${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify:
|
||||||
- inference
|
${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type:
|
||||||
- safety
|
remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
|
||||||
- telemetry
|
\ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n
|
||||||
- tool_runtime
|
\ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n
|
||||||
- vector_io
|
\ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n
|
||||||
providers:
|
\ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n
|
||||||
inference:
|
\ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n
|
||||||
- provider_id: vllm-inference
|
\ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n
|
||||||
provider_type: remote::vllm
|
\ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n
|
||||||
config:
|
\ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id:
|
||||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
\ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
\ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n
|
||||||
- provider_id: vllm-safety
|
\ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n
|
||||||
provider_type: remote::vllm
|
\ provider_type: inline::meta-reference\n config:\n persistence_store:\n
|
||||||
config:
|
\ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port:
|
||||||
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
\ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
||||||
- provider_id: sentence-transformers
|
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
provider_type: inline::sentence-transformers
|
\ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n
|
||||||
config: {}
|
\ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks:
|
||||||
vector_io:
|
${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
\ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
|
||||||
provider_type: remote::chromadb
|
\ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n
|
||||||
config:
|
\ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results:
|
||||||
url: ${env.CHROMADB_URL:=}
|
3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config:
|
||||||
kvstore:
|
{}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n
|
||||||
type: postgres
|
\ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
\ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host:
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
|
||||||
safety:
|
metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id:
|
||||||
- provider_id: llama-guard
|
sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n
|
||||||
provider_type: inline::llama-guard
|
\ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id:
|
||||||
config:
|
${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n
|
||||||
excluded_categories: []
|
\ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
|
||||||
agents:
|
[]\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
|
||||||
- provider_id: meta-reference
|
builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
|
||||||
provider_type: inline::meta-reference
|
\ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n
|
||||||
config:
|
\ type: github_token\n"
|
||||||
persistence_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
responses_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
|
||||||
tool_runtime:
|
|
||||||
- provider_id: brave-search
|
|
||||||
provider_type: remote::brave-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: tavily-search
|
|
||||||
provider_type: remote::tavily-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
config: {}
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
table_name: llamastack_kvstore
|
|
||||||
inference_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
models:
|
|
||||||
- metadata:
|
|
||||||
embedding_dimension: 384
|
|
||||||
model_id: all-MiniLM-L6-v2
|
|
||||||
provider_id: sentence-transformers
|
|
||||||
model_type: embedding
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
|
||||||
provider_id: vllm-inference
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
|
||||||
provider_id: vllm-safety
|
|
||||||
model_type: llm
|
|
||||||
shields:
|
|
||||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
|
||||||
vector_dbs: []
|
|
||||||
datasets: []
|
|
||||||
scoring_fns: []
|
|
||||||
benchmarks: []
|
|
||||||
tool_groups:
|
|
||||||
- toolgroup_id: builtin::websearch
|
|
||||||
provider_id: tavily-search
|
|
||||||
- toolgroup_id: builtin::rag
|
|
||||||
provider_id: rag-runtime
|
|
||||||
server:
|
|
||||||
port: 8321
|
|
||||||
auth:
|
|
||||||
provider_config:
|
|
||||||
type: github_token
|
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
creationTimestamp: null
|
creationTimestamp: null
|
||||||
|
|
|
@ -3,6 +3,7 @@ image_name: kubernetes-demo
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
@ -38,6 +39,14 @@ providers:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -18,12 +18,13 @@ embedding_model_id = (
|
||||||
).identifier
|
).identifier
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
embedding_dimension = em.metadata["embedding_dimension"]
|
||||||
|
|
||||||
_ = client.vector_dbs.register(
|
vector_db = client.vector_dbs.register(
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
embedding_model=embedding_model_id,
|
embedding_model=embedding_model_id,
|
||||||
embedding_dimension=embedding_dimension,
|
embedding_dimension=embedding_dimension,
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
)
|
)
|
||||||
|
vector_db_id = vector_db.identifier
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
source = "https://www.paulgraham.com/greatwork.html"
|
||||||
print("rag_tool> Ingesting document:", source)
|
print("rag_tool> Ingesting document:", source)
|
||||||
document = RAGDocument(
|
document = RAGDocument(
|
||||||
|
@ -35,7 +36,7 @@ document = RAGDocument(
|
||||||
client.tool_runtime.rag_tool.insert(
|
client.tool_runtime.rag_tool.insert(
|
||||||
documents=[document],
|
documents=[document],
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
chunk_size_in_tokens=50,
|
chunk_size_in_tokens=100,
|
||||||
)
|
)
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
client,
|
client,
|
||||||
|
|
|
@ -8,3 +8,4 @@ Here's a list of known external providers that you can use with Llama Stack:
|
||||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
||||||
|
| MongoDB | VectorIO with MongoDB | Vector_IO | Remote | [mongodb-llama-stack](https://github.com/mongodb-partners/mongodb-llama-stack) |
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -79,3 +79,10 @@ class ConflictError(ValueError):
|
||||||
|
|
||||||
def __init__(self, message: str) -> None:
|
def __init__(self, message: str) -> None:
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
class TokenValidationError(ValueError):
|
||||||
|
"""raised when token validation fails during authentication"""
|
||||||
|
|
||||||
|
def __init__(self, message: str) -> None:
|
||||||
|
super().__init__(message)
|
||||||
|
|
|
@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
:cvar benchmarks: Benchmark suite management
|
:cvar benchmarks: Benchmark suite management
|
||||||
:cvar tool_groups: Tool group organization
|
:cvar tool_groups: Tool group organization
|
||||||
:cvar files: File storage and management
|
:cvar files: File storage and management
|
||||||
|
:cvar prompts: Prompt versions and management
|
||||||
:cvar inspect: Built-in system inspection and introspection
|
:cvar inspect: Built-in system inspection and introspection
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -127,6 +128,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
benchmarks = "benchmarks"
|
benchmarks = "benchmarks"
|
||||||
tool_groups = "tool_groups"
|
tool_groups = "tool_groups"
|
||||||
files = "files"
|
files = "files"
|
||||||
|
prompts = "prompts"
|
||||||
|
|
||||||
# built-in API
|
# built-in API
|
||||||
inspect = "inspect"
|
inspect = "inspect"
|
||||||
|
|
9
llama_stack/apis/prompts/__init__.py
Normal file
9
llama_stack/apis/prompts/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
|
|
||||||
|
__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
|
189
llama_stack/apis/prompts/prompts.py
Normal file
189
llama_stack/apis/prompts/prompts.py
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import secrets
|
||||||
|
from typing import Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Prompt(BaseModel):
|
||||||
|
"""A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
|
||||||
|
|
||||||
|
:param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
|
||||||
|
:param version: Version (integer starting at 1, incremented on save)
|
||||||
|
:param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||||
|
:param variables: List of prompt variable names that can be used in the prompt template
|
||||||
|
:param is_default: Boolean indicating whether this version is the default version for this prompt
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
|
||||||
|
version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
|
||||||
|
prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
|
||||||
|
variables: list[str] = Field(
|
||||||
|
default_factory=list, description="List of variable names that can be used in the prompt template"
|
||||||
|
)
|
||||||
|
is_default: bool = Field(
|
||||||
|
default=False, description="Boolean indicating whether this version is the default version"
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("prompt_id")
|
||||||
|
@classmethod
|
||||||
|
def validate_prompt_id(cls, prompt_id: str) -> str:
|
||||||
|
if not isinstance(prompt_id, str):
|
||||||
|
raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
|
||||||
|
|
||||||
|
if not prompt_id.startswith("pmpt_"):
|
||||||
|
raise ValueError("prompt_id must start with 'pmpt_' prefix")
|
||||||
|
|
||||||
|
hex_part = prompt_id[5:]
|
||||||
|
if len(hex_part) != 48:
|
||||||
|
raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
|
||||||
|
|
||||||
|
for char in hex_part:
|
||||||
|
if char not in "0123456789abcdef":
|
||||||
|
raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
|
||||||
|
|
||||||
|
return prompt_id
|
||||||
|
|
||||||
|
@field_validator("version")
|
||||||
|
@classmethod
|
||||||
|
def validate_version(cls, prompt_version: int) -> int:
|
||||||
|
if prompt_version < 1:
|
||||||
|
raise ValueError("version must be >= 1")
|
||||||
|
return prompt_version
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_prompt_variables(self):
|
||||||
|
"""Validate that all variables used in the prompt are declared in the variables list."""
|
||||||
|
if not self.prompt:
|
||||||
|
return self
|
||||||
|
|
||||||
|
prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
|
||||||
|
declared_variables = set(self.variables)
|
||||||
|
|
||||||
|
undeclared = prompt_variables - declared_variables
|
||||||
|
if undeclared:
|
||||||
|
raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_prompt_id(cls) -> str:
|
||||||
|
# Generate 48 hex characters (24 bytes)
|
||||||
|
random_bytes = secrets.token_bytes(24)
|
||||||
|
hex_string = random_bytes.hex()
|
||||||
|
return f"pmpt_{hex_string}"
|
||||||
|
|
||||||
|
|
||||||
|
class ListPromptsResponse(BaseModel):
|
||||||
|
"""Response model to list prompts."""
|
||||||
|
|
||||||
|
data: list[Prompt]
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
@trace_protocol
|
||||||
|
class Prompts(Protocol):
|
||||||
|
"""Protocol for prompt management operations."""
|
||||||
|
|
||||||
|
@webmethod(route="/prompts", method="GET")
|
||||||
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
"""List all prompts.
|
||||||
|
|
||||||
|
:returns: A ListPromptsResponse containing all prompts.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}/versions", method="GET")
|
||||||
|
async def list_prompt_versions(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
) -> ListPromptsResponse:
|
||||||
|
"""List all versions of a specific prompt.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to list versions for.
|
||||||
|
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="GET")
|
||||||
|
async def get_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
version: int | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Get a prompt by its identifier and optional version.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to get.
|
||||||
|
:param version: The version of the prompt to get (defaults to latest).
|
||||||
|
:returns: A Prompt resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts", method="POST")
|
||||||
|
async def create_prompt(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Create a new prompt.
|
||||||
|
|
||||||
|
:param prompt: The prompt text content with variable placeholders.
|
||||||
|
:param variables: List of variable names that can be used in the prompt template.
|
||||||
|
:returns: The created Prompt resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="PUT")
|
||||||
|
async def update_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
prompt: str,
|
||||||
|
version: int,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
set_as_default: bool = True,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Update an existing prompt (increments version).
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to update.
|
||||||
|
:param prompt: The updated prompt text content.
|
||||||
|
:param version: The current version of the prompt being updated.
|
||||||
|
:param variables: Updated list of variable names that can be used in the prompt template.
|
||||||
|
:param set_as_default: Set the new version as the default (default=True).
|
||||||
|
:returns: The updated Prompt resource with incremented version.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="DELETE")
|
||||||
|
async def delete_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
) -> None:
|
||||||
|
"""Delete a prompt.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to delete.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
|
||||||
|
async def set_default_version(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
version: int,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt.
|
||||||
|
:param version: The version to set as default.
|
||||||
|
:returns: The prompt with the specified version now set as default.
|
||||||
|
"""
|
||||||
|
...
|
|
@ -19,6 +19,7 @@ class ResourceType(StrEnum):
|
||||||
benchmark = "benchmark"
|
benchmark = "benchmark"
|
||||||
tool = "tool"
|
tool = "tool"
|
||||||
tool_group = "tool_group"
|
tool_group = "tool_group"
|
||||||
|
prompt = "prompt"
|
||||||
|
|
||||||
|
|
||||||
class Resource(BaseModel):
|
class Resource(BaseModel):
|
||||||
|
|
|
@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||||
|
|
||||||
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
@ -294,6 +295,12 @@ def _generate_run_config(
|
||||||
if build_config.external_providers_dir
|
if build_config.external_providers_dir
|
||||||
else EXTERNAL_PROVIDERS_DIR,
|
else EXTERNAL_PROVIDERS_DIR,
|
||||||
)
|
)
|
||||||
|
if not run_config.inference_store:
|
||||||
|
run_config.inference_store = SqliteSqlStoreConfig(
|
||||||
|
**SqliteSqlStoreConfig.sample_run_config(
|
||||||
|
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
|
||||||
|
)
|
||||||
|
)
|
||||||
# build providers dict
|
# build providers dict
|
||||||
provider_registry = get_provider_registry(build_config)
|
provider_registry = get_provider_registry(build_config)
|
||||||
for api in apis:
|
for api in apis:
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, Any, Literal, Self
|
from typing import Annotated, Any, Literal, Self
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
|
|
||||||
|
@ -212,6 +213,7 @@ class AuthProviderType(StrEnum):
|
||||||
OAUTH2_TOKEN = "oauth2_token"
|
OAUTH2_TOKEN = "oauth2_token"
|
||||||
GITHUB_TOKEN = "github_token"
|
GITHUB_TOKEN = "github_token"
|
||||||
CUSTOM = "custom"
|
CUSTOM = "custom"
|
||||||
|
KUBERNETES = "kubernetes"
|
||||||
|
|
||||||
|
|
||||||
class OAuth2TokenAuthConfig(BaseModel):
|
class OAuth2TokenAuthConfig(BaseModel):
|
||||||
|
@ -282,8 +284,45 @@ class GitHubTokenAuthConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KubernetesAuthProviderConfig(BaseModel):
|
||||||
|
"""Configuration for Kubernetes authentication provider."""
|
||||||
|
|
||||||
|
type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES
|
||||||
|
api_server_url: str = Field(
|
||||||
|
default="https://kubernetes.default.svc",
|
||||||
|
description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)",
|
||||||
|
)
|
||||||
|
verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates")
|
||||||
|
tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification")
|
||||||
|
claims_mapping: dict[str, str] = Field(
|
||||||
|
default_factory=lambda: {
|
||||||
|
"username": "roles",
|
||||||
|
"groups": "roles",
|
||||||
|
},
|
||||||
|
description="Mapping of Kubernetes user claims to access attributes",
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("api_server_url")
|
||||||
|
@classmethod
|
||||||
|
def validate_api_server_url(cls, v):
|
||||||
|
parsed = urlparse(v)
|
||||||
|
if not parsed.scheme or not parsed.netloc:
|
||||||
|
raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}")
|
||||||
|
if parsed.scheme not in ["http", "https"]:
|
||||||
|
raise ValueError(f"api_server_url scheme must be http or https: {v}")
|
||||||
|
return v
|
||||||
|
|
||||||
|
@field_validator("claims_mapping")
|
||||||
|
@classmethod
|
||||||
|
def validate_claims_mapping(cls, v):
|
||||||
|
for key, value in v.items():
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"claims_mapping value cannot be empty: {key}")
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
AuthProviderConfig = Annotated[
|
AuthProviderConfig = Annotated[
|
||||||
OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig,
|
OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig,
|
||||||
Field(discriminator="type"),
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -392,6 +431,12 @@ class ServerConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceStoreConfig(BaseModel):
|
||||||
|
sql_store_config: SqlStoreConfig
|
||||||
|
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
|
||||||
|
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackRunConfig(BaseModel):
|
||||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
|
@ -425,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
|
||||||
a default SQLite store will be used.""",
|
a default SQLite store will be used.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
inference_store: SqlStoreConfig | None = Field(
|
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="""
|
description="""
|
||||||
Configuration for the persistence store used by the inference API. If not specified,
|
Configuration for the persistence store used by the inference API. Can be either a
|
||||||
a default SQLite store will be used.""",
|
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
|
||||||
|
If not specified, a default SQLite store will be used.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
# registry of "resources" in the distribution
|
# registry of "resources" in the distribution
|
||||||
|
|
|
@ -10,7 +10,6 @@ import json
|
||||||
import logging # allow-direct-logging
|
import logging # allow-direct-logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
||||||
self.async_client = AsyncLlamaStackAsLibraryClient(
|
self.async_client = AsyncLlamaStackAsLibraryClient(
|
||||||
config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
|
config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
|
||||||
)
|
)
|
||||||
self.pool_executor = ThreadPoolExecutor(max_workers=4)
|
|
||||||
self.provider_data = provider_data
|
self.provider_data = provider_data
|
||||||
|
|
||||||
self.loop = asyncio.new_event_loop()
|
self.loop = asyncio.new_event_loop()
|
||||||
|
|
233
llama_stack/core/prompts/prompts.py
Normal file
233
llama_stack/core/prompts/prompts.py
Normal file
|
@ -0,0 +1,233 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
|
|
||||||
|
|
||||||
|
class PromptServiceConfig(BaseModel):
|
||||||
|
"""Configuration for the built-in prompt service.
|
||||||
|
|
||||||
|
:param run_config: Stack run configuration containing distribution info
|
||||||
|
"""
|
||||||
|
|
||||||
|
run_config: StackRunConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
|
"""Get the prompt service implementation."""
|
||||||
|
impl = PromptServiceImpl(config, deps)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
||||||
|
|
||||||
|
|
||||||
|
class PromptServiceImpl(Prompts):
|
||||||
|
"""Built-in prompt service implementation using KVStore."""
|
||||||
|
|
||||||
|
def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
|
self.config = config
|
||||||
|
self.deps = deps
|
||||||
|
self.kvstore: KVStore
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
kvstore_config = SqliteKVStoreConfig(
|
||||||
|
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
|
||||||
|
)
|
||||||
|
self.kvstore = await kvstore_impl(kvstore_config)
|
||||||
|
|
||||||
|
def _get_default_key(self, prompt_id: str) -> str:
|
||||||
|
"""Get the KVStore key that stores the default version number."""
|
||||||
|
return f"prompts:v1:{prompt_id}:default"
|
||||||
|
|
||||||
|
async def _get_prompt_key(self, prompt_id: str, version: int | None = None) -> str:
|
||||||
|
"""Get the KVStore key for prompt data, returning default version if applicable."""
|
||||||
|
if version:
|
||||||
|
return self._get_version_key(prompt_id, str(version))
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_id)
|
||||||
|
resolved_version = await self.kvstore.get(default_key)
|
||||||
|
if resolved_version is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id}:default not found")
|
||||||
|
return self._get_version_key(prompt_id, resolved_version)
|
||||||
|
|
||||||
|
def _get_version_key(self, prompt_id: str, version: str) -> str:
|
||||||
|
"""Get the KVStore key for a specific prompt version."""
|
||||||
|
return f"prompts:v1:{prompt_id}:{version}"
|
||||||
|
|
||||||
|
def _get_list_key_prefix(self) -> str:
|
||||||
|
"""Get the key prefix for listing prompts."""
|
||||||
|
return "prompts:v1:"
|
||||||
|
|
||||||
|
def _serialize_prompt(self, prompt: Prompt) -> str:
|
||||||
|
"""Serialize a prompt to JSON string for storage."""
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"prompt_id": prompt.prompt_id,
|
||||||
|
"prompt": prompt.prompt,
|
||||||
|
"version": prompt.version,
|
||||||
|
"variables": prompt.variables or [],
|
||||||
|
"is_default": prompt.is_default,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def _deserialize_prompt(self, data: str) -> Prompt:
|
||||||
|
"""Deserialize a prompt from JSON string."""
|
||||||
|
obj = json.loads(data)
|
||||||
|
return Prompt(
|
||||||
|
prompt_id=obj["prompt_id"],
|
||||||
|
prompt=obj["prompt"],
|
||||||
|
version=obj["version"],
|
||||||
|
variables=obj.get("variables", []),
|
||||||
|
is_default=obj.get("is_default", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
"""List all prompts (default versions only)."""
|
||||||
|
prefix = self._get_list_key_prefix()
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
prompts = []
|
||||||
|
for key in keys:
|
||||||
|
if key.endswith(":default"):
|
||||||
|
try:
|
||||||
|
default_version = await self.kvstore.get(key)
|
||||||
|
if default_version:
|
||||||
|
prompt_id = key.replace(prefix, "").replace(":default", "")
|
||||||
|
version_key = self._get_version_key(prompt_id, default_version)
|
||||||
|
data = await self.kvstore.get(version_key)
|
||||||
|
if data:
|
||||||
|
prompt = self._deserialize_prompt(data)
|
||||||
|
prompts.append(prompt)
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
|
||||||
|
return ListPromptsResponse(data=prompts)
|
||||||
|
|
||||||
|
async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt:
|
||||||
|
"""Get a prompt by its identifier and optional version."""
|
||||||
|
key = await self._get_prompt_key(prompt_id, version)
|
||||||
|
data = await self.kvstore.get(key)
|
||||||
|
if data is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found")
|
||||||
|
return self._deserialize_prompt(data)
|
||||||
|
|
||||||
|
async def create_prompt(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Create a new prompt."""
|
||||||
|
if variables is None:
|
||||||
|
variables = []
|
||||||
|
|
||||||
|
prompt_obj = Prompt(
|
||||||
|
prompt_id=Prompt.generate_prompt_id(),
|
||||||
|
prompt=prompt,
|
||||||
|
version=1,
|
||||||
|
variables=variables,
|
||||||
|
)
|
||||||
|
|
||||||
|
version_key = self._get_version_key(prompt_obj.prompt_id, str(prompt_obj.version))
|
||||||
|
data = self._serialize_prompt(prompt_obj)
|
||||||
|
await self.kvstore.set(version_key, data)
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_obj.prompt_id)
|
||||||
|
await self.kvstore.set(default_key, str(prompt_obj.version))
|
||||||
|
|
||||||
|
return prompt_obj
|
||||||
|
|
||||||
|
async def update_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
prompt: str,
|
||||||
|
version: int,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
set_as_default: bool = True,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Update an existing prompt (increments version)."""
|
||||||
|
if version < 1:
|
||||||
|
raise ValueError("Version must be >= 1")
|
||||||
|
if variables is None:
|
||||||
|
variables = []
|
||||||
|
|
||||||
|
prompt_versions = await self.list_prompt_versions(prompt_id)
|
||||||
|
latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version))
|
||||||
|
|
||||||
|
if version and latest_prompt.version != version:
|
||||||
|
raise ValueError(
|
||||||
|
f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
|
||||||
|
)
|
||||||
|
|
||||||
|
current_version = latest_prompt.version if version is None else version
|
||||||
|
new_version = current_version + 1
|
||||||
|
|
||||||
|
updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables)
|
||||||
|
|
||||||
|
version_key = self._get_version_key(prompt_id, str(new_version))
|
||||||
|
data = self._serialize_prompt(updated_prompt)
|
||||||
|
await self.kvstore.set(version_key, data)
|
||||||
|
|
||||||
|
if set_as_default:
|
||||||
|
await self.set_default_version(prompt_id, new_version)
|
||||||
|
|
||||||
|
return updated_prompt
|
||||||
|
|
||||||
|
async def delete_prompt(self, prompt_id: str) -> None:
|
||||||
|
"""Delete a prompt and all its versions."""
|
||||||
|
await self.get_prompt(prompt_id)
|
||||||
|
|
||||||
|
prefix = f"prompts:v1:{prompt_id}:"
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
await self.kvstore.delete(key)
|
||||||
|
|
||||||
|
async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse:
|
||||||
|
"""List all versions of a specific prompt."""
|
||||||
|
prefix = f"prompts:v1:{prompt_id}:"
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
default_version = None
|
||||||
|
prompts = []
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
data = await self.kvstore.get(key)
|
||||||
|
if key.endswith(":default"):
|
||||||
|
default_version = data
|
||||||
|
else:
|
||||||
|
if data:
|
||||||
|
prompt_obj = self._deserialize_prompt(data)
|
||||||
|
prompts.append(prompt_obj)
|
||||||
|
|
||||||
|
if not prompts:
|
||||||
|
raise ValueError(f"Prompt {prompt_id} not found")
|
||||||
|
|
||||||
|
for prompt in prompts:
|
||||||
|
prompt.is_default = str(prompt.version) == default_version
|
||||||
|
|
||||||
|
prompts.sort(key=lambda x: x.version)
|
||||||
|
return ListPromptsResponse(data=prompts)
|
||||||
|
|
||||||
|
async def set_default_version(self, prompt_id: str, version: int) -> Prompt:
|
||||||
|
"""Set which version of a prompt should be the default, If not set. the default is the latest."""
|
||||||
|
version_key = self._get_version_key(prompt_id, str(version))
|
||||||
|
data = await self.kvstore.get(version_key)
|
||||||
|
if data is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id} version {version} not found")
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_id)
|
||||||
|
await self.kvstore.set(default_key, str(version))
|
||||||
|
|
||||||
|
return self._deserialize_prompt(data)
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference, InferenceProvider
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
from llama_stack.apis.prompts import Prompts
|
||||||
from llama_stack.apis.providers import Providers as ProvidersAPI
|
from llama_stack.apis.providers import Providers as ProvidersAPI
|
||||||
from llama_stack.apis.safety import Safety
|
from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.scoring import Scoring
|
from llama_stack.apis.scoring import Scoring
|
||||||
|
@ -93,6 +94,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
||||||
Api.tool_groups: ToolGroups,
|
Api.tool_groups: ToolGroups,
|
||||||
Api.tool_runtime: ToolRuntime,
|
Api.tool_runtime: ToolRuntime,
|
||||||
Api.files: Files,
|
Api.files: Files,
|
||||||
|
Api.prompts: Prompts,
|
||||||
}
|
}
|
||||||
|
|
||||||
if external_apis:
|
if external_apis:
|
||||||
|
@ -284,7 +286,15 @@ async def instantiate_providers(
|
||||||
if provider.provider_id is None:
|
if provider.provider_id is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
||||||
|
except KeyError as e:
|
||||||
|
missing_api = e.args[0]
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
|
||||||
|
f"required dependency '{missing_api.value}' is not available. "
|
||||||
|
f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
|
||||||
|
) from e
|
||||||
for a in provider.spec.optional_api_dependencies:
|
for a in provider.spec.optional_api_dependencies:
|
||||||
if a in impls:
|
if a in impls:
|
||||||
deps[a] = impls[a]
|
deps[a] = impls[a]
|
||||||
|
|
|
@ -78,7 +78,10 @@ async def get_auto_router_impl(
|
||||||
|
|
||||||
# TODO: move pass configs to routers instead
|
# TODO: move pass configs to routers instead
|
||||||
if api == Api.inference and run_config.inference_store:
|
if api == Api.inference and run_config.inference_store:
|
||||||
inference_store = InferenceStore(run_config.inference_store, policy)
|
inference_store = InferenceStore(
|
||||||
|
config=run_config.inference_store,
|
||||||
|
policy=policy,
|
||||||
|
)
|
||||||
await inference_store.initialize()
|
await inference_store.initialize()
|
||||||
api_to_dep_impl["store"] = inference_store
|
api_to_dep_impl["store"] = inference_store
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,7 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core::routers")
|
logger = get_logger(name=__name__, category="core::routers")
|
||||||
|
|
||||||
|
@ -90,6 +90,11 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
logger.debug("InferenceRouter.shutdown")
|
logger.debug("InferenceRouter.shutdown")
|
||||||
|
if self.store:
|
||||||
|
try:
|
||||||
|
await self.store.shutdown()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error during InferenceStore shutdown: {e}")
|
||||||
|
|
||||||
async def register_model(
|
async def register_model(
|
||||||
self,
|
self,
|
||||||
|
@ -160,7 +165,7 @@ class InferenceRouter(Inference):
|
||||||
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
||||||
|
|
||||||
async def _count_tokens(
|
async def _count_tokens(
|
||||||
|
@ -431,7 +436,7 @@ class InferenceRouter(Inference):
|
||||||
model=model_obj,
|
model=model_obj,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# these metrics will show up in the client response.
|
# these metrics will show up in the client response.
|
||||||
response.metrics = (
|
response.metrics = (
|
||||||
|
@ -527,7 +532,7 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
# Store the response with the ID that will be returned to the client
|
# Store the response with the ID that will be returned to the client
|
||||||
if self.store:
|
if self.store:
|
||||||
await self.store.store_chat_completion(response, messages)
|
asyncio.create_task(self.store.store_chat_completion(response, messages))
|
||||||
|
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
metrics = self._construct_metrics(
|
metrics = self._construct_metrics(
|
||||||
|
@ -537,7 +542,7 @@ class InferenceRouter(Inference):
|
||||||
model=model_obj,
|
model=model_obj,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
# these metrics will show up in the client response.
|
# these metrics will show up in the client response.
|
||||||
response.metrics = (
|
response.metrics = (
|
||||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||||
|
@ -664,7 +669,7 @@ class InferenceRouter(Inference):
|
||||||
"completion_tokens",
|
"completion_tokens",
|
||||||
"total_tokens",
|
"total_tokens",
|
||||||
]: # Only log completion and total tokens
|
]: # Only log completion and total tokens
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# Return metrics in response
|
# Return metrics in response
|
||||||
async_metrics = [
|
async_metrics = [
|
||||||
|
@ -710,7 +715,7 @@ class InferenceRouter(Inference):
|
||||||
)
|
)
|
||||||
for metric in completion_metrics:
|
for metric in completion_metrics:
|
||||||
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# Return metrics in response
|
# Return metrics in response
|
||||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
|
||||||
|
@ -755,7 +760,7 @@ class InferenceRouter(Inference):
|
||||||
choices_data[idx] = {
|
choices_data[idx] = {
|
||||||
"content_parts": [],
|
"content_parts": [],
|
||||||
"tool_calls_builder": {},
|
"tool_calls_builder": {},
|
||||||
"finish_reason": None,
|
"finish_reason": "stop",
|
||||||
"logprobs_content_parts": [],
|
"logprobs_content_parts": [],
|
||||||
}
|
}
|
||||||
current_choice_data = choices_data[idx]
|
current_choice_data = choices_data[idx]
|
||||||
|
@ -806,7 +811,7 @@ class InferenceRouter(Inference):
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
yield chunk
|
yield chunk
|
||||||
finally:
|
finally:
|
||||||
|
@ -855,4 +860,4 @@ class InferenceRouter(Inference):
|
||||||
object="chat.completion",
|
object="chat.completion",
|
||||||
)
|
)
|
||||||
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
||||||
await self.store.store_chat_completion(final_response, messages)
|
asyncio.create_task(self.store.store_chat_completion(final_response, messages))
|
||||||
|
|
|
@ -53,6 +53,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
vector_db_name: str | None = None,
|
vector_db_name: str | None = None,
|
||||||
) -> VectorDB:
|
) -> VectorDB:
|
||||||
provider_vector_db_id = provider_vector_db_id or vector_db_id
|
provider_vector_db_id = provider_vector_db_id or vector_db_id
|
||||||
|
|
||||||
model = await lookup_model(self, embedding_model)
|
model = await lookup_model(self, embedding_model)
|
||||||
if model is None:
|
if model is None:
|
||||||
raise ModelNotFoundError(embedding_model)
|
raise ModelNotFoundError(embedding_model)
|
||||||
|
@ -60,14 +61,33 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||||
if "embedding_dimension" not in model.metadata:
|
if "embedding_dimension" not in model.metadata:
|
||||||
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
||||||
|
|
||||||
|
provider = self.impls_by_provider_id[provider_id]
|
||||||
|
logger.warning(
|
||||||
|
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
|
||||||
|
)
|
||||||
|
vector_store = await provider.openai_create_vector_store(
|
||||||
|
name=vector_db_name or vector_db_id,
|
||||||
|
embedding_model=embedding_model,
|
||||||
|
embedding_dimension=model.metadata["embedding_dimension"],
|
||||||
|
provider_id=provider_id,
|
||||||
|
provider_vector_db_id=provider_vector_db_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_store_id = vector_store.id
|
||||||
|
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
|
||||||
|
logger.warning(
|
||||||
|
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
|
||||||
|
)
|
||||||
|
|
||||||
vector_db_data = {
|
vector_db_data = {
|
||||||
"identifier": vector_db_id,
|
"identifier": vector_store_id,
|
||||||
"type": ResourceType.vector_db.value,
|
"type": ResourceType.vector_db.value,
|
||||||
"provider_id": provider_id,
|
"provider_id": provider_id,
|
||||||
"provider_resource_id": provider_vector_db_id,
|
"provider_resource_id": actual_provider_vector_db_id,
|
||||||
"embedding_model": embedding_model,
|
"embedding_model": embedding_model,
|
||||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||||
"vector_db_name": vector_db_name,
|
"vector_db_name": vector_store.name,
|
||||||
}
|
}
|
||||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||||
await self.register_object(vector_db)
|
await self.register_object(vector_db)
|
||||||
|
|
|
@ -8,16 +8,18 @@ import ssl
|
||||||
import time
|
import time
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from asyncio import Lock
|
from asyncio import Lock
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urljoin, urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from jose import jwt
|
from jose import jwt
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.apis.common.errors import TokenValidationError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationConfig,
|
AuthenticationConfig,
|
||||||
CustomAuthConfig,
|
CustomAuthConfig,
|
||||||
GitHubTokenAuthConfig,
|
GitHubTokenAuthConfig,
|
||||||
|
KubernetesAuthProviderConfig,
|
||||||
OAuth2TokenAuthConfig,
|
OAuth2TokenAuthConfig,
|
||||||
User,
|
User,
|
||||||
)
|
)
|
||||||
|
@ -162,7 +164,7 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
||||||
auth=auth,
|
auth=auth,
|
||||||
timeout=10.0, # Add a reasonable timeout
|
timeout=10.0, # Add a reasonable timeout
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != httpx.codes.OK:
|
||||||
logger.warning(f"Token introspection failed with status code: {response.status_code}")
|
logger.warning(f"Token introspection failed with status code: {response.status_code}")
|
||||||
raise ValueError(f"Token introspection failed: {response.status_code}")
|
raise ValueError(f"Token introspection failed: {response.status_code}")
|
||||||
|
|
||||||
|
@ -272,7 +274,7 @@ class CustomAuthProvider(AuthProvider):
|
||||||
json=auth_request.model_dump(),
|
json=auth_request.model_dump(),
|
||||||
timeout=10.0, # Add a reasonable timeout
|
timeout=10.0, # Add a reasonable timeout
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != httpx.codes.OK:
|
||||||
logger.warning(f"Authentication failed with status code: {response.status_code}")
|
logger.warning(f"Authentication failed with status code: {response.status_code}")
|
||||||
raise ValueError(f"Authentication failed: {response.status_code}")
|
raise ValueError(f"Authentication failed: {response.status_code}")
|
||||||
|
|
||||||
|
@ -374,6 +376,89 @@ async def _get_github_user_info(access_token: str, github_api_base_url: str) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KubernetesAuthProvider(AuthProvider):
|
||||||
|
"""
|
||||||
|
Kubernetes authentication provider that validates tokens using the Kubernetes SelfSubjectReview API.
|
||||||
|
This provider integrates with Kubernetes API server by using the
|
||||||
|
/apis/authentication.k8s.io/v1/selfsubjectreviews endpoint to validate tokens and extract user information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: KubernetesAuthProviderConfig):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def _httpx_verify_value(self) -> bool | str:
|
||||||
|
"""
|
||||||
|
Build the value for httpx's `verify` parameter.
|
||||||
|
- False disables verification.
|
||||||
|
- Path string points to a CA bundle.
|
||||||
|
- True uses system defaults.
|
||||||
|
"""
|
||||||
|
if not self.config.verify_tls:
|
||||||
|
return False
|
||||||
|
if self.config.tls_cafile:
|
||||||
|
return self.config.tls_cafile.as_posix()
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def validate_token(self, token: str, scope: dict | None = None) -> User:
|
||||||
|
"""Validate a token using Kubernetes SelfSubjectReview API endpoint."""
|
||||||
|
# Build the Kubernetes SelfSubjectReview API endpoint URL
|
||||||
|
review_api_url = urljoin(self.config.api_server_url, "/apis/authentication.k8s.io/v1/selfsubjectreviews")
|
||||||
|
|
||||||
|
# Create SelfSubjectReview request body
|
||||||
|
review_request = {"apiVersion": "authentication.k8s.io/v1", "kind": "SelfSubjectReview"}
|
||||||
|
verify = self._httpx_verify_value()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(verify=verify, timeout=10.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
review_api_url,
|
||||||
|
json=review_request,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == httpx.codes.UNAUTHORIZED:
|
||||||
|
raise TokenValidationError("Invalid token")
|
||||||
|
if response.status_code != httpx.codes.CREATED:
|
||||||
|
logger.warning(f"Kubernetes SelfSubjectReview API failed with status code: {response.status_code}")
|
||||||
|
raise TokenValidationError(f"Token validation failed: {response.status_code}")
|
||||||
|
|
||||||
|
review_response = response.json()
|
||||||
|
# Extract user information from SelfSubjectReview response
|
||||||
|
status = review_response.get("status", {})
|
||||||
|
if not status:
|
||||||
|
raise ValueError("No status found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
user_info = status.get("userInfo", {})
|
||||||
|
if not user_info:
|
||||||
|
raise ValueError("No userInfo found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
username = user_info.get("username")
|
||||||
|
if not username:
|
||||||
|
raise ValueError("No username found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
# Build user attributes from Kubernetes user info
|
||||||
|
user_attributes = get_attributes_from_claims(user_info, self.config.claims_mapping)
|
||||||
|
|
||||||
|
return User(
|
||||||
|
principal=username,
|
||||||
|
attributes=user_attributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning("Kubernetes SelfSubjectReview API request timed out")
|
||||||
|
raise ValueError("Token validation timeout") from None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error during token validation: {str(e)}")
|
||||||
|
raise ValueError(f"Token validation error: {str(e)}") from e
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close any resources."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
||||||
"""Factory function to create the appropriate auth provider."""
|
"""Factory function to create the appropriate auth provider."""
|
||||||
provider_config = config.provider_config
|
provider_config = config.provider_config
|
||||||
|
@ -384,5 +469,7 @@ def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
||||||
return OAuth2TokenAuthProvider(provider_config)
|
return OAuth2TokenAuthProvider(provider_config)
|
||||||
elif isinstance(provider_config, GitHubTokenAuthConfig):
|
elif isinstance(provider_config, GitHubTokenAuthConfig):
|
||||||
return GitHubTokenAuthProvider(provider_config)
|
return GitHubTokenAuthProvider(provider_config)
|
||||||
|
elif isinstance(provider_config, KubernetesAuthProviderConfig):
|
||||||
|
return KubernetesAuthProvider(provider_config)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown authentication provider config type: {type(provider_config)}")
|
raise ValueError(f"Unknown authentication provider config type: {type(provider_config)}")
|
||||||
|
|
|
@ -132,15 +132,17 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
elif isinstance(exc, ConflictError):
|
elif isinstance(exc, ConflictError):
|
||||||
return HTTPException(status_code=409, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.CONFLICT, detail=str(exc))
|
||||||
elif isinstance(exc, ResourceNotFoundError):
|
elif isinstance(exc, ResourceNotFoundError):
|
||||||
return HTTPException(status_code=404, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.NOT_FOUND, detail=str(exc))
|
||||||
elif isinstance(exc, ValueError):
|
elif isinstance(exc, ValueError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
||||||
elif isinstance(exc, BadRequestError):
|
elif isinstance(exc, BadRequestError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
||||||
elif isinstance(exc, PermissionError | AccessDeniedError):
|
elif isinstance(exc, PermissionError | AccessDeniedError):
|
||||||
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
||||||
|
elif isinstance(exc, ConnectionError | httpx.ConnectError):
|
||||||
|
return HTTPException(status_code=httpx.codes.BAD_GATEWAY, detail=str(exc))
|
||||||
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
||||||
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
||||||
elif isinstance(exc, NotImplementedError):
|
elif isinstance(exc, NotImplementedError):
|
||||||
|
@ -513,6 +515,7 @@ def main(args: argparse.Namespace | None = None):
|
||||||
|
|
||||||
apis_to_serve.add("inspect")
|
apis_to_serve.add("inspect")
|
||||||
apis_to_serve.add("providers")
|
apis_to_serve.add("providers")
|
||||||
|
apis_to_serve.add("prompts")
|
||||||
for api_str in apis_to_serve:
|
for api_str in apis_to_serve:
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
from llama_stack.apis.prompts import Prompts
|
||||||
from llama_stack.apis.providers import Providers
|
from llama_stack.apis.providers import Providers
|
||||||
from llama_stack.apis.safety import Safety
|
from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.scoring import Scoring
|
from llama_stack.apis.scoring import Scoring
|
||||||
|
@ -37,6 +38,7 @@ from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.core.datatypes import Provider, StackRunConfig
|
from llama_stack.core.datatypes import Provider, StackRunConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||||
|
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
||||||
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
||||||
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
||||||
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
||||||
|
@ -72,6 +74,7 @@ class LlamaStack(
|
||||||
ToolRuntime,
|
ToolRuntime,
|
||||||
RAGToolRuntime,
|
RAGToolRuntime,
|
||||||
Files,
|
Files,
|
||||||
|
Prompts,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -305,6 +308,12 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
||||||
)
|
)
|
||||||
impls[Api.providers] = providers_impl
|
impls[Api.providers] = providers_impl
|
||||||
|
|
||||||
|
prompts_impl = PromptServiceImpl(
|
||||||
|
PromptServiceConfig(run_config=run_config),
|
||||||
|
deps=impls,
|
||||||
|
)
|
||||||
|
impls[Api.prompts] = prompts_impl
|
||||||
|
|
||||||
|
|
||||||
# Produces a stack of providers for the given run config. Not all APIs may be
|
# Produces a stack of providers for the given run config. Not all APIs may be
|
||||||
# asked for in the run config.
|
# asked for in the run config.
|
||||||
|
@ -329,6 +338,9 @@ async def construct_stack(
|
||||||
# Add internal implementations after all other providers are resolved
|
# Add internal implementations after all other providers are resolved
|
||||||
add_internal_implementations(impls, run_config)
|
add_internal_implementations(impls, run_config)
|
||||||
|
|
||||||
|
if Api.prompts in impls:
|
||||||
|
await impls[Api.prompts].initialize()
|
||||||
|
|
||||||
await register_resources(run_config, impls)
|
await register_resources(run_config, impls)
|
||||||
|
|
||||||
await refresh_registry_once(impls)
|
await refresh_registry_once(impls)
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="ci-tests")
|
||||||
name = "ci-tests"
|
|
||||||
template.name = name
|
|
||||||
template.description = "CI tests for Llama Stack"
|
template.description = "CI tests for Llama Stack"
|
||||||
|
|
||||||
return template
|
return template
|
||||||
|
|
|
@ -89,28 +89,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +121,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -89,28 +89,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +121,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="starter-gpu")
|
||||||
name = "starter-gpu"
|
|
||||||
template.name = name
|
|
||||||
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
||||||
|
|
||||||
template.providers["post_training"] = [
|
template.providers["post_training"] = [
|
||||||
|
|
|
@ -99,9 +99,8 @@ def get_remote_inference_providers() -> list[Provider]:
|
||||||
return inference_providers
|
return inference_providers
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
remote_inference_providers = get_remote_inference_providers()
|
remote_inference_providers = get_remote_inference_providers()
|
||||||
name = "starter"
|
|
||||||
|
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
||||||
|
|
|
@ -178,9 +178,9 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
# TODO: set expiration time for garbage collection
|
# TODO: set expiration time for garbage collection
|
||||||
|
|
||||||
if endpoint not in ["/v1/chat/completions"]:
|
if endpoint not in ["/v1/chat/completions", "/v1/completions"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions. Code: invalid_value. Param: endpoint",
|
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions. Code: invalid_value. Param: endpoint",
|
||||||
)
|
)
|
||||||
|
|
||||||
if completion_window != "24h":
|
if completion_window != "24h":
|
||||||
|
@ -424,13 +424,21 @@ class ReferenceBatchesImpl(Batches):
|
||||||
)
|
)
|
||||||
valid = False
|
valid = False
|
||||||
|
|
||||||
for param, expected_type, type_string in [
|
if batch.endpoint == "/v1/chat/completions":
|
||||||
|
required_params = [
|
||||||
("model", str, "a string"),
|
("model", str, "a string"),
|
||||||
# messages is specific to /v1/chat/completions
|
# messages is specific to /v1/chat/completions
|
||||||
# we could skip validating messages here and let inference fail. however,
|
# we could skip validating messages here and let inference fail. however,
|
||||||
# that would be a very expensive way to find out messages is wrong.
|
# that would be a very expensive way to find out messages is wrong.
|
||||||
("messages", list, "an array"), # TODO: allow messages to be a string?
|
("messages", list, "an array"), # TODO: allow messages to be a string?
|
||||||
]:
|
]
|
||||||
|
else: # /v1/completions
|
||||||
|
required_params = [
|
||||||
|
("model", str, "a string"),
|
||||||
|
("prompt", str, "a string"), # TODO: allow prompt to be a list of strings??
|
||||||
|
]
|
||||||
|
|
||||||
|
for param, expected_type, type_string in required_params:
|
||||||
if param not in body:
|
if param not in body:
|
||||||
errors.append(
|
errors.append(
|
||||||
BatchError(
|
BatchError(
|
||||||
|
@ -591,6 +599,7 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# TODO(SECURITY): review body for security issues
|
# TODO(SECURITY): review body for security issues
|
||||||
|
if request.url == "/v1/chat/completions":
|
||||||
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
||||||
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
||||||
|
|
||||||
|
@ -605,6 +614,22 @@ class ReferenceBatchesImpl(Batches):
|
||||||
"body": chat_response.model_dump_json(),
|
"body": chat_response.model_dump_json(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
else: # /v1/completions
|
||||||
|
completion_response = await self.inference_api.openai_completion(**request.body)
|
||||||
|
|
||||||
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
|
assert hasattr(completion_response, "model_dump_json"), (
|
||||||
|
"Completion response must have model_dump_json method"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"id": request_id,
|
||||||
|
"custom_id": request.custom_id,
|
||||||
|
"response": {
|
||||||
|
"status_code": 200,
|
||||||
|
"request_id": request_id,
|
||||||
|
"body": completion_response.model_dump_json(),
|
||||||
|
},
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -14,6 +14,6 @@ from .config import RagToolRuntimeConfig
|
||||||
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
||||||
from .memory import MemoryToolRuntimeImpl
|
from .memory import MemoryToolRuntimeImpl
|
||||||
|
|
||||||
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
|
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -5,10 +5,15 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import mimetypes
|
||||||
import secrets
|
import secrets
|
||||||
import string
|
import string
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import UploadFile
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
|
@ -17,6 +22,7 @@ from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContentItem,
|
InterleavedContentItem,
|
||||||
TextContentItem,
|
TextContentItem,
|
||||||
)
|
)
|
||||||
|
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.tools import (
|
from llama_stack.apis.tools import (
|
||||||
ListToolDefsResponse,
|
ListToolDefsResponse,
|
||||||
|
@ -30,13 +36,18 @@ from llama_stack.apis.tools import (
|
||||||
ToolParameter,
|
ToolParameter,
|
||||||
ToolRuntime,
|
ToolRuntime,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import (
|
||||||
|
QueryChunksResponse,
|
||||||
|
VectorIO,
|
||||||
|
VectorStoreChunkingStrategyStatic,
|
||||||
|
VectorStoreChunkingStrategyStaticConfig,
|
||||||
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
content_from_doc,
|
content_from_doc,
|
||||||
make_overlapped_chunks,
|
parse_data_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import RagToolRuntimeConfig
|
from .config import RagToolRuntimeConfig
|
||||||
|
@ -55,10 +66,12 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
config: RagToolRuntimeConfig,
|
config: RagToolRuntimeConfig,
|
||||||
vector_io_api: VectorIO,
|
vector_io_api: VectorIO,
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
|
files_api: Files,
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.vector_io_api = vector_io_api
|
self.vector_io_api = vector_io_api
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
self.files_api = files_api
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
pass
|
pass
|
||||||
|
@ -78,26 +91,49 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
vector_db_id: str,
|
vector_db_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
chunks = []
|
if not documents:
|
||||||
for doc in documents:
|
|
||||||
content = await content_from_doc(doc)
|
|
||||||
# TODO: we should add enrichment here as URLs won't be added to the metadata by default
|
|
||||||
chunks.extend(
|
|
||||||
make_overlapped_chunks(
|
|
||||||
doc.document_id,
|
|
||||||
content,
|
|
||||||
chunk_size_in_tokens,
|
|
||||||
chunk_size_in_tokens // 4,
|
|
||||||
doc.metadata,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not chunks:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
await self.vector_io_api.insert_chunks(
|
for doc in documents:
|
||||||
chunks=chunks,
|
if isinstance(doc.content, URL):
|
||||||
vector_db_id=vector_db_id,
|
if doc.content.uri.startswith("data:"):
|
||||||
|
parts = parse_data_url(doc.content.uri)
|
||||||
|
file_data = base64.b64decode(parts["data"]) if parts["is_base64"] else parts["data"].encode()
|
||||||
|
mime_type = parts["mimetype"]
|
||||||
|
else:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(doc.content.uri)
|
||||||
|
file_data = response.content
|
||||||
|
mime_type = doc.mime_type or response.headers.get("content-type", "application/octet-stream")
|
||||||
|
else:
|
||||||
|
content_str = await content_from_doc(doc)
|
||||||
|
file_data = content_str.encode("utf-8")
|
||||||
|
mime_type = doc.mime_type or "text/plain"
|
||||||
|
|
||||||
|
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||||
|
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||||
|
|
||||||
|
file_obj = io.BytesIO(file_data)
|
||||||
|
file_obj.name = filename
|
||||||
|
|
||||||
|
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||||
|
|
||||||
|
created_file = await self.files_api.openai_upload_file(
|
||||||
|
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||||
|
)
|
||||||
|
|
||||||
|
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||||
|
static=VectorStoreChunkingStrategyStaticConfig(
|
||||||
|
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||||
|
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||||
|
vector_store_id=vector_db_id,
|
||||||
|
file_id=created_file.id,
|
||||||
|
attributes=doc.metadata,
|
||||||
|
chunking_strategy=chunking_strategy,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
|
@ -131,8 +167,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
for vector_db_id in vector_db_ids
|
for vector_db_id in vector_db_ids
|
||||||
]
|
]
|
||||||
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
||||||
chunks = [c for r in results for c in r.chunks]
|
|
||||||
scores = [s for r in results for s in r.scores]
|
chunks = []
|
||||||
|
scores = []
|
||||||
|
|
||||||
|
for vector_db_id, result in zip(vector_db_ids, results, strict=False):
|
||||||
|
for chunk, score in zip(result.chunks, result.scores, strict=False):
|
||||||
|
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
||||||
|
chunk.metadata = {}
|
||||||
|
chunk.metadata["vector_db_id"] = vector_db_id
|
||||||
|
|
||||||
|
chunks.append(chunk)
|
||||||
|
scores.append(score)
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return RAGQueryResult(content=None)
|
return RAGQueryResult(content=None)
|
||||||
|
@ -167,6 +213,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
metadata_keys_to_exclude_from_context = [
|
metadata_keys_to_exclude_from_context = [
|
||||||
"token_count",
|
"token_count",
|
||||||
"metadata_token_count",
|
"metadata_token_count",
|
||||||
|
"vector_db_id",
|
||||||
]
|
]
|
||||||
metadata_for_context = {}
|
metadata_for_context = {}
|
||||||
for k in chunk_metadata_keys_to_include_from_context:
|
for k in chunk_metadata_keys_to_include_from_context:
|
||||||
|
@ -191,6 +238,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
"document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
|
"document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
|
||||||
"chunks": [c.content for c in chunks[: len(picked)]],
|
"chunks": [c.content for c in chunks[: len(picked)]],
|
||||||
"scores": scores[: len(picked)],
|
"scores": scores[: len(picked)],
|
||||||
|
"vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -30,11 +30,11 @@ from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
RERANKER_TYPE_RRF,
|
RERANKER_TYPE_RRF,
|
||||||
RERANKER_TYPE_WEIGHTED,
|
|
||||||
ChunkForDeletion,
|
ChunkForDeletion,
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="vector_io")
|
logger = get_logger(name=__name__, category="vector_io")
|
||||||
|
|
||||||
|
@ -66,59 +66,6 @@ def _create_sqlite_connection(db_path):
|
||||||
return connection
|
return connection
|
||||||
|
|
||||||
|
|
||||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
|
||||||
"""Normalize scores to [0,1] range using min-max normalization."""
|
|
||||||
if not scores:
|
|
||||||
return {}
|
|
||||||
min_score = min(scores.values())
|
|
||||||
max_score = max(scores.values())
|
|
||||||
score_range = max_score - min_score
|
|
||||||
if score_range > 0:
|
|
||||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
|
||||||
return dict.fromkeys(scores, 1.0)
|
|
||||||
|
|
||||||
|
|
||||||
def _weighted_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
alpha: float = 0.5,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses weighted average of scores."""
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
normalized_vector_scores = _normalize_scores(vector_scores)
|
|
||||||
normalized_keyword_scores = _normalize_scores(keyword_scores)
|
|
||||||
|
|
||||||
return {
|
|
||||||
doc_id: (alpha * normalized_keyword_scores.get(doc_id, 0.0))
|
|
||||||
+ ((1 - alpha) * normalized_vector_scores.get(doc_id, 0.0))
|
|
||||||
for doc_id in all_ids
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _rrf_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
impact_factor: float = 60.0,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses Reciprocal Rank Fusion."""
|
|
||||||
# Convert scores to ranks
|
|
||||||
vector_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
keyword_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
rrf_scores = {}
|
|
||||||
for doc_id in all_ids:
|
|
||||||
vector_rank = vector_ranks.get(doc_id, float("inf"))
|
|
||||||
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
|
|
||||||
# RRF formula: score = 1/(k + r) where k is impact_factor and r is the rank
|
|
||||||
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
|
|
||||||
return rrf_scores
|
|
||||||
|
|
||||||
|
|
||||||
def _make_sql_identifier(name: str) -> str:
|
def _make_sql_identifier(name: str) -> str:
|
||||||
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||||
|
|
||||||
|
@ -398,14 +345,10 @@ class SQLiteVecIndex(EmbeddingIndex):
|
||||||
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Combine scores using the specified reranker
|
# Combine scores using the reranking utility
|
||||||
if reranker_type == RERANKER_TYPE_WEIGHTED:
|
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
||||||
alpha = reranker_params.get("alpha", 0.5)
|
vector_scores, keyword_scores, reranker_type, reranker_params
|
||||||
combined_scores = _weighted_rerank(vector_scores, keyword_scores, alpha)
|
)
|
||||||
else:
|
|
||||||
# Default to RRF for None, RRF, or any unknown types
|
|
||||||
impact_factor = reranker_params.get("impact_factor", 60.0)
|
|
||||||
combined_scores = _rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
|
||||||
|
|
||||||
# Sort by combined score and get top k results
|
# Sort by combined score and get top k results
|
||||||
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
|
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.batches,
|
api=Api.batches,
|
||||||
provider_type="inline::reference",
|
provider_type="inline::reference",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.inline.batches.reference",
|
module="llama_stack.providers.inline.batches.reference",
|
||||||
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -30,7 +30,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="huggingface",
|
adapter_type="huggingface",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"datasets",
|
"datasets>=4.0.0",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.huggingface",
|
module="llama_stack.providers.remote.datasetio.huggingface",
|
||||||
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
||||||
|
@ -42,7 +42,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="nvidia",
|
adapter_type="nvidia",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"datasets",
|
"datasets>=4.0.0",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.nvidia",
|
module="llama_stack.providers.remote.datasetio.nvidia",
|
||||||
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
||||||
|
|
|
@ -75,7 +75,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="vllm",
|
adapter_type="vllm",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.vllm",
|
module="llama_stack.providers.remote.inference.vllm",
|
||||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||||
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||||
|
@ -116,7 +116,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="fireworks",
|
adapter_type="fireworks",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"fireworks-ai<=0.18.0",
|
"fireworks-ai<=0.17.16",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
|
@ -151,9 +151,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="databricks",
|
adapter_type="databricks",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"openai",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.databricks",
|
module="llama_stack.providers.remote.inference.databricks",
|
||||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||||
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||||
|
@ -163,9 +161,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="nvidia",
|
adapter_type="nvidia",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"openai",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.nvidia",
|
module="llama_stack.providers.remote.inference.nvidia",
|
||||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||||
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||||
|
@ -175,7 +171,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="runpod",
|
adapter_type="runpod",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.runpod",
|
module="llama_stack.providers.remote.inference.runpod",
|
||||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||||
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||||
|
@ -292,7 +288,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="watsonx",
|
adapter_type="watsonx",
|
||||||
pip_packages=["ibm_watson_machine_learning"],
|
pip_packages=["ibm_watsonx_ai"],
|
||||||
module="llama_stack.providers.remote.inference.watsonx",
|
module="llama_stack.providers.remote.inference.watsonx",
|
||||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||||
|
|
|
@ -48,7 +48,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
provider_type="inline::huggingface-gpu",
|
provider_type="inline::huggingface-gpu",
|
||||||
pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
|
pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"],
|
||||||
module="llama_stack.providers.inline.post_training.huggingface",
|
module="llama_stack.providers.inline.post_training.huggingface",
|
||||||
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
|
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -38,7 +38,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.scoring,
|
api=Api.scoring,
|
||||||
provider_type="inline::braintrust",
|
provider_type="inline::braintrust",
|
||||||
pip_packages=["autoevals", "openai"],
|
pip_packages=["autoevals"],
|
||||||
module="llama_stack.providers.inline.scoring.braintrust",
|
module="llama_stack.providers.inline.scoring.braintrust",
|
||||||
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
|
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -32,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.tool_runtime.rag",
|
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||||
api_dependencies=[Api.vector_io, Api.inference],
|
api_dependencies=[Api.vector_io, Api.inference, Api.files],
|
||||||
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
|
|
@ -5,12 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import AnthropicConfig
|
from .config import AnthropicConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class AnthropicInferenceAdapter(LiteLLMOpenAIMixin):
|
class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: AnthropicConfig) -> None:
|
def __init__(self, config: AnthropicConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -26,3 +27,8 @@ class AnthropicInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self):
|
||||||
|
return "https://api.anthropic.com/v1"
|
||||||
|
|
|
@ -5,12 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import GeminiConfig
|
from .config import GeminiConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: GeminiConfig) -> None:
|
def __init__(self, config: GeminiConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -21,6 +22,11 @@ class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self):
|
||||||
|
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
|
|
|
@ -4,30 +4,15 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from collections.abc import AsyncIterator
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAIChoiceDelta,
|
|
||||||
OpenAIChunkChoice,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
OpenAISystemMessageParam,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
prepare_openai_completion_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
_config: GroqConfig
|
_config: GroqConfig
|
||||||
|
|
||||||
def __init__(self, config: GroqConfig):
|
def __init__(self, config: GroqConfig):
|
||||||
|
@ -40,122 +25,14 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
return f"{self.config.url}/openai/v1"
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
def _get_openai_client(self) -> AsyncOpenAI:
|
|
||||||
return AsyncOpenAI(
|
|
||||||
base_url=f"{self.config.url}/openai/v1",
|
|
||||||
api_key=self.get_api_key(),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: list[OpenAIMessageParam],
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
function_call: str | dict[str, Any] | None = None,
|
|
||||||
functions: list[dict[str, Any]] | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_completion_tokens: int | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
parallel_tool_calls: bool | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
response_format: OpenAIResponseFormatParam | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
tool_choice: str | dict[str, Any] | None = None,
|
|
||||||
tools: list[dict[str, Any]] | None = None,
|
|
||||||
top_logprobs: int | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
||||||
model_obj = await self.model_store.get_model(model)
|
|
||||||
|
|
||||||
# Groq does not support json_schema response format, so we need to convert it to json_object
|
|
||||||
if response_format and response_format.type == "json_schema":
|
|
||||||
response_format.type = "json_object"
|
|
||||||
schema = response_format.json_schema.get("schema", {})
|
|
||||||
response_format.json_schema = None
|
|
||||||
json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
|
|
||||||
if messages and messages[0].role == "system":
|
|
||||||
messages[0].content = messages[0].content + json_instructions
|
|
||||||
else:
|
|
||||||
messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
|
|
||||||
|
|
||||||
# Groq returns a 400 error if tools are provided but none are called
|
|
||||||
# So, set tool_choice to "required" to attempt to force a call
|
|
||||||
if tools and (not tool_choice or tool_choice == "auto"):
|
|
||||||
tool_choice = "required"
|
|
||||||
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
messages=messages,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
function_call=function_call,
|
|
||||||
functions=functions,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_completion_tokens=max_completion_tokens,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
response_format=response_format,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tools=tools,
|
|
||||||
top_logprobs=top_logprobs,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Groq does not support streaming requests that set response_format
|
|
||||||
fake_stream = False
|
|
||||||
if stream and response_format:
|
|
||||||
params["stream"] = False
|
|
||||||
fake_stream = True
|
|
||||||
|
|
||||||
response = await self._get_openai_client().chat.completions.create(**params)
|
|
||||||
|
|
||||||
if fake_stream:
|
|
||||||
chunk_choices = []
|
|
||||||
for choice in response.choices:
|
|
||||||
delta = OpenAIChoiceDelta(
|
|
||||||
content=choice.message.content,
|
|
||||||
role=choice.message.role,
|
|
||||||
tool_calls=choice.message.tool_calls,
|
|
||||||
)
|
|
||||||
chunk_choice = OpenAIChunkChoice(
|
|
||||||
delta=delta,
|
|
||||||
finish_reason=choice.finish_reason,
|
|
||||||
index=choice.index,
|
|
||||||
logprobs=None,
|
|
||||||
)
|
|
||||||
chunk_choices.append(chunk_choice)
|
|
||||||
chunk = OpenAIChatCompletionChunk(
|
|
||||||
id=response.id,
|
|
||||||
choices=chunk_choices,
|
|
||||||
object="chat.completion.chunk",
|
|
||||||
created=response.created,
|
|
||||||
model=response.model,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _fake_stream_generator():
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
return _fake_stream_generator()
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
|
|
|
@ -118,10 +118,10 @@ class OllamaInferenceAdapter(
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
||||||
health_response = await self.health()
|
r = await self.health()
|
||||||
if health_response["status"] == HealthStatus.ERROR:
|
if r["status"] == HealthStatus.ERROR:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Ollama Server is not running, make sure to start it using `ollama serve` in a separate terminal"
|
f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
async def should_refresh_models(self) -> bool:
|
||||||
|
@ -156,7 +156,7 @@ class OllamaInferenceAdapter(
|
||||||
),
|
),
|
||||||
Model(
|
Model(
|
||||||
identifier="nomic-embed-text",
|
identifier="nomic-embed-text",
|
||||||
provider_resource_id="nomic-embed-text",
|
provider_resource_id="nomic-embed-text:latest",
|
||||||
provider_id=provider_id,
|
provider_id=provider_id,
|
||||||
metadata={
|
metadata={
|
||||||
"embedding_dimension": 768,
|
"embedding_dimension": 768,
|
||||||
|
|
|
@ -4,13 +4,26 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
|
"""
|
||||||
|
SambaNova Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
|
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||||
|
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||||
|
is used instead of LiteLLMOpenAIMixin.check_model_availability().
|
||||||
|
|
||||||
|
- OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists
|
||||||
|
- LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, config: SambaNovaImplConfig):
|
def __init__(self, config: SambaNovaImplConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.environment_available_models = []
|
self.environment_available_models = []
|
||||||
|
@ -24,3 +37,14 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
download_images=True, # SambaNova requires base64 image encoding
|
download_images=True, # SambaNova requires base64 image encoding
|
||||||
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the base URL for OpenAI mixin.
|
||||||
|
|
||||||
|
:return: The SambaNova base URL
|
||||||
|
"""
|
||||||
|
return self.config.url
|
||||||
|
|
|
@ -6,16 +6,20 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import google.auth.transport.requests
|
||||||
|
from google.auth import default
|
||||||
|
|
||||||
from llama_stack.apis.inference import ChatCompletionRequest
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
LiteLLMOpenAIMixin,
|
LiteLLMOpenAIMixin,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import VertexAIConfig
|
from .config import VertexAIConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: VertexAIConfig) -> None:
|
def __init__(self, config: VertexAIConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -27,10 +31,31 @@ class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
# Vertex AI doesn't use API keys, it uses Application Default Credentials
|
"""
|
||||||
# Return empty string to let litellm handle authentication via ADC
|
Get an access token for Vertex AI using Application Default Credentials.
|
||||||
|
|
||||||
|
Vertex AI uses ADC instead of API keys. This method obtains an access token
|
||||||
|
from the default credentials and returns it for use with the OpenAI-compatible client.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get default credentials - will read from GOOGLE_APPLICATION_CREDENTIALS
|
||||||
|
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
|
||||||
|
credentials.refresh(google.auth.transport.requests.Request())
|
||||||
|
return str(credentials.token)
|
||||||
|
except Exception:
|
||||||
|
# If we can't get credentials, return empty string to let LiteLLM handle it
|
||||||
|
# This allows the LiteLLM mixin to work with ADC directly
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the Vertex AI OpenAI-compatible API base URL.
|
||||||
|
|
||||||
|
Returns the Vertex AI OpenAI-compatible endpoint URL.
|
||||||
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
||||||
|
"""
|
||||||
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
# Get base parameters from parent
|
# Get base parameters from parent
|
||||||
params = await super()._get_params(request)
|
params = await super()._get_params(request)
|
||||||
|
|
|
@ -7,8 +7,8 @@
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
from collections.abc import AsyncGenerator, AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from ibm_watson_machine_learning.foundation_models import Model
|
from ibm_watsonx_ai.foundation_models import Model
|
||||||
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
|
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
||||||
|
|
|
@ -4,53 +4,55 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
class BedrockBaseConfig(BaseModel):
|
class BedrockBaseConfig(BaseModel):
|
||||||
aws_access_key_id: str | None = Field(
|
aws_access_key_id: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_ACCESS_KEY_ID"),
|
||||||
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
||||||
)
|
)
|
||||||
aws_secret_access_key: str | None = Field(
|
aws_secret_access_key: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||||
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
||||||
)
|
)
|
||||||
aws_session_token: str | None = Field(
|
aws_session_token: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SESSION_TOKEN"),
|
||||||
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
||||||
)
|
)
|
||||||
region_name: str | None = Field(
|
region_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
|
||||||
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
||||||
"Default use environment variable: AWS_DEFAULT_REGION",
|
"Default use environment variable: AWS_DEFAULT_REGION",
|
||||||
)
|
)
|
||||||
profile_name: str | None = Field(
|
profile_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_PROFILE"),
|
||||||
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
||||||
)
|
)
|
||||||
total_max_attempts: int | None = Field(
|
total_max_attempts: int | None = Field(
|
||||||
default=None,
|
default_factory=lambda: int(val) if (val := os.getenv("AWS_MAX_ATTEMPTS")) else None,
|
||||||
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
||||||
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
||||||
)
|
)
|
||||||
retry_mode: str | None = Field(
|
retry_mode: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_RETRY_MODE"),
|
||||||
description="A string representing the type of retries Boto3 will perform."
|
description="A string representing the type of retries Boto3 will perform."
|
||||||
"Default use environment variable: AWS_RETRY_MODE",
|
"Default use environment variable: AWS_RETRY_MODE",
|
||||||
)
|
)
|
||||||
connect_timeout: float | None = Field(
|
connect_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_CONNECT_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
read_timeout: float | None = Field(
|
read_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_READ_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
session_ttl: int | None = Field(
|
session_ttl: int | None = Field(
|
||||||
default=3600,
|
default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
|
||||||
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import base64
|
import base64
|
||||||
import struct
|
import struct
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
@ -43,9 +44,11 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
model = await self.model_store.get_model(model_id)
|
model = await self.model_store.get_model(model_id)
|
||||||
embedding_model = self._load_sentence_transformer_model(model.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(
|
embeddings = await asyncio.to_thread(
|
||||||
[interleaved_content_as_str(content) for content in contents], show_progress_bar=False
|
embedding_model.encode,
|
||||||
|
[interleaved_content_as_str(content) for content in contents],
|
||||||
|
show_progress_bar=False,
|
||||||
)
|
)
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
|
@ -64,8 +67,8 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
|
|
||||||
# Get the model and generate embeddings
|
# Get the model and generate embeddings
|
||||||
model_obj = await self.model_store.get_model(model)
|
model_obj = await self.model_store.get_model(model)
|
||||||
embedding_model = self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(input_list, show_progress_bar=False)
|
embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
|
||||||
|
|
||||||
# Convert embeddings to the requested format
|
# Convert embeddings to the requested format
|
||||||
data = []
|
data = []
|
||||||
|
@ -93,7 +96,7 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
usage=usage,
|
usage=usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
async def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
||||||
global EMBEDDING_MODELS
|
global EMBEDDING_MODELS
|
||||||
|
|
||||||
loaded_model = EMBEDDING_MODELS.get(model)
|
loaded_model = EMBEDDING_MODELS.get(model)
|
||||||
|
@ -101,8 +104,12 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
||||||
log.info(f"Loading sentence transformer for {model}...")
|
log.info(f"Loading sentence transformer for {model}...")
|
||||||
|
|
||||||
|
def _load_model():
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
loaded_model = SentenceTransformer(model)
|
return SentenceTransformer(model)
|
||||||
|
|
||||||
|
loaded_model = await asyncio.to_thread(_load_model)
|
||||||
EMBEDDING_MODELS[model] = loaded_model
|
EMBEDDING_MODELS[model] = loaded_model
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
|
@ -3,6 +3,11 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
import asyncio
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ListOpenAIChatCompletionResponse,
|
ListOpenAIChatCompletionResponse,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
@ -10,24 +15,43 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
Order,
|
Order,
|
||||||
)
|
)
|
||||||
from llama_stack.core.datatypes import AccessRule
|
from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig
|
||||||
from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
from ..sqlstore.api import ColumnDefinition, ColumnType
|
from ..sqlstore.api import ColumnDefinition, ColumnType
|
||||||
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
|
from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="inference_store")
|
||||||
|
|
||||||
|
|
||||||
class InferenceStore:
|
class InferenceStore:
|
||||||
def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]):
|
def __init__(
|
||||||
if not sql_store_config:
|
self,
|
||||||
sql_store_config = SqliteSqlStoreConfig(
|
config: InferenceStoreConfig | SqlStoreConfig,
|
||||||
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
|
policy: list[AccessRule],
|
||||||
|
):
|
||||||
|
# Handle backward compatibility
|
||||||
|
if not isinstance(config, InferenceStoreConfig):
|
||||||
|
# Legacy: SqlStoreConfig passed directly as config
|
||||||
|
config = InferenceStoreConfig(
|
||||||
|
sql_store_config=config,
|
||||||
)
|
)
|
||||||
self.sql_store_config = sql_store_config
|
|
||||||
|
self.config = config
|
||||||
|
self.sql_store_config = config.sql_store_config
|
||||||
self.sql_store = None
|
self.sql_store = None
|
||||||
self.policy = policy
|
self.policy = policy
|
||||||
|
|
||||||
|
# Disable write queue for SQLite to avoid concurrency issues
|
||||||
|
self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
|
||||||
|
|
||||||
|
# Async write queue and worker control
|
||||||
|
self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
|
||||||
|
self._worker_tasks: list[asyncio.Task[Any]] = []
|
||||||
|
self._max_write_queue_size: int = config.max_write_queue_size
|
||||||
|
self._num_writers: int = max(1, config.num_writers)
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
"""Create the necessary tables if they don't exist."""
|
"""Create the necessary tables if they don't exist."""
|
||||||
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
|
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
|
||||||
|
@ -42,23 +66,109 @@ class InferenceStore:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.enable_write_queue:
|
||||||
|
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
||||||
|
for _ in range(self._num_writers):
|
||||||
|
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
||||||
|
else:
|
||||||
|
logger.info("Write queue disabled for SQLite to avoid concurrency issues")
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
if not self._worker_tasks:
|
||||||
|
return
|
||||||
|
if self._queue is not None:
|
||||||
|
await self._queue.join()
|
||||||
|
for t in self._worker_tasks:
|
||||||
|
if not t.done():
|
||||||
|
t.cancel()
|
||||||
|
for t in self._worker_tasks:
|
||||||
|
try:
|
||||||
|
await t
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
self._worker_tasks.clear()
|
||||||
|
|
||||||
|
async def flush(self) -> None:
|
||||||
|
"""Wait for all queued writes to complete. Useful for testing."""
|
||||||
|
if self.enable_write_queue and self._queue is not None:
|
||||||
|
await self._queue.join()
|
||||||
|
|
||||||
async def store_chat_completion(
|
async def store_chat_completion(
|
||||||
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||||
) -> None:
|
) -> None:
|
||||||
if not self.sql_store:
|
if self.enable_write_queue:
|
||||||
|
if self._queue is None:
|
||||||
|
raise ValueError("Inference store is not initialized")
|
||||||
|
try:
|
||||||
|
self._queue.put_nowait((chat_completion, input_messages))
|
||||||
|
except asyncio.QueueFull:
|
||||||
|
logger.warning(
|
||||||
|
f"Write queue full; adding chat completion id={getattr(chat_completion, 'id', '<unknown>')}"
|
||||||
|
)
|
||||||
|
await self._queue.put((chat_completion, input_messages))
|
||||||
|
else:
|
||||||
|
await self._write_chat_completion(chat_completion, input_messages)
|
||||||
|
|
||||||
|
async def _worker_loop(self) -> None:
|
||||||
|
assert self._queue is not None
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = await self._queue.get()
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
chat_completion, input_messages = item
|
||||||
|
try:
|
||||||
|
await self._write_chat_completion(chat_completion, input_messages)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.error(f"Error writing chat completion: {e}")
|
||||||
|
finally:
|
||||||
|
self._queue.task_done()
|
||||||
|
|
||||||
|
async def _write_chat_completion(
|
||||||
|
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||||
|
) -> None:
|
||||||
|
if self.sql_store is None:
|
||||||
raise ValueError("Inference store is not initialized")
|
raise ValueError("Inference store is not initialized")
|
||||||
|
|
||||||
data = chat_completion.model_dump()
|
data = chat_completion.model_dump()
|
||||||
|
record_data = {
|
||||||
await self.sql_store.insert(
|
|
||||||
table="chat_completions",
|
|
||||||
data={
|
|
||||||
"id": data["id"],
|
"id": data["id"],
|
||||||
"created": data["created"],
|
"created": data["created"],
|
||||||
"model": data["model"],
|
"model": data["model"],
|
||||||
"choices": data["choices"],
|
"choices": data["choices"],
|
||||||
"input_messages": [message.model_dump() for message in input_messages],
|
"input_messages": [message.model_dump() for message in input_messages],
|
||||||
},
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.sql_store.insert(
|
||||||
|
table="chat_completions",
|
||||||
|
data=record_data,
|
||||||
|
)
|
||||||
|
except IntegrityError as e:
|
||||||
|
# Duplicate chat completion IDs can be generated during tests especially if they are replaying
|
||||||
|
# recorded responses across different tests. No need to warn or error under those circumstances.
|
||||||
|
# In the wild, this is not likely to happen at all (no evidence) so we aren't really hiding any problem.
|
||||||
|
|
||||||
|
# Check if it's a unique constraint violation
|
||||||
|
error_message = str(e.orig) if e.orig else str(e)
|
||||||
|
if self._is_unique_constraint_error(error_message):
|
||||||
|
# Update the existing record instead
|
||||||
|
await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
|
||||||
|
else:
|
||||||
|
# Re-raise if it's not a unique constraint error
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _is_unique_constraint_error(self, error_message: str) -> bool:
|
||||||
|
"""Check if the error is specifically a unique constraint violation."""
|
||||||
|
error_lower = error_message.lower()
|
||||||
|
return any(
|
||||||
|
indicator in error_lower
|
||||||
|
for indicator in [
|
||||||
|
"unique constraint failed", # SQLite
|
||||||
|
"duplicate key", # PostgreSQL
|
||||||
|
"unique violation", # PostgreSQL alternative
|
||||||
|
"duplicate entry", # MySQL
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
async def list_chat_completions(
|
async def list_chat_completions(
|
||||||
|
|
|
@ -172,6 +172,20 @@ class AuthorizedSqlStore:
|
||||||
|
|
||||||
return results.data[0] if results.data else None
|
return results.data[0] if results.data else None
|
||||||
|
|
||||||
|
async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None:
|
||||||
|
"""Update rows with automatic access control attribute capture."""
|
||||||
|
enhanced_data = dict(data)
|
||||||
|
|
||||||
|
current_user = get_authenticated_user()
|
||||||
|
if current_user:
|
||||||
|
enhanced_data["owner_principal"] = current_user.principal
|
||||||
|
enhanced_data["access_attributes"] = current_user.attributes
|
||||||
|
else:
|
||||||
|
enhanced_data["owner_principal"] = None
|
||||||
|
enhanced_data["access_attributes"] = None
|
||||||
|
|
||||||
|
await self.sql_store.update(table, enhanced_data, where)
|
||||||
|
|
||||||
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
||||||
"""Delete rows with automatic access control filtering."""
|
"""Delete rows with automatic access control filtering."""
|
||||||
await self.sql_store.delete(table, where)
|
await self.sql_store.delete(table, where)
|
||||||
|
|
|
@ -18,6 +18,7 @@ from functools import wraps
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
from llama_stack.apis.telemetry import (
|
||||||
|
Event,
|
||||||
LogSeverity,
|
LogSeverity,
|
||||||
Span,
|
Span,
|
||||||
SpanEndPayload,
|
SpanEndPayload,
|
||||||
|
@ -98,7 +99,7 @@ class BackgroundLogger:
|
||||||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
def __init__(self, api: Telemetry, capacity: int = 100000):
|
||||||
self.api = api
|
self.api = api
|
||||||
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
||||||
self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
|
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
||||||
self.worker_thread.start()
|
self.worker_thread.start()
|
||||||
self._last_queue_full_log_time: float = 0.0
|
self._last_queue_full_log_time: float = 0.0
|
||||||
self._dropped_since_last_notice: int = 0
|
self._dropped_since_last_notice: int = 0
|
||||||
|
@ -118,12 +119,16 @@ class BackgroundLogger:
|
||||||
self._last_queue_full_log_time = current_time
|
self._last_queue_full_log_time = current_time
|
||||||
self._dropped_since_last_notice = 0
|
self._dropped_since_last_notice = 0
|
||||||
|
|
||||||
def _process_logs(self):
|
def _worker(self):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
loop.run_until_complete(self._process_logs())
|
||||||
|
|
||||||
|
async def _process_logs(self):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
event = self.log_queue.get()
|
event = self.log_queue.get()
|
||||||
# figure out how to use a thread's native loop
|
await self.api.log_event(event)
|
||||||
asyncio.run(self.api.log_event(event))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
@ -136,6 +141,19 @@ class BackgroundLogger:
|
||||||
self.log_queue.join()
|
self.log_queue.join()
|
||||||
|
|
||||||
|
|
||||||
|
def enqueue_event(event: Event) -> None:
|
||||||
|
"""Enqueue a telemetry event to the background logger if available.
|
||||||
|
|
||||||
|
This provides a non-blocking path for routers and other hot paths to
|
||||||
|
submit telemetry without awaiting the Telemetry API, reducing contention
|
||||||
|
with the main event loop.
|
||||||
|
"""
|
||||||
|
global BACKGROUND_LOGGER
|
||||||
|
if BACKGROUND_LOGGER is None:
|
||||||
|
raise RuntimeError("Telemetry API not initialized")
|
||||||
|
BACKGROUND_LOGGER.log_event(event)
|
||||||
|
|
||||||
|
|
||||||
class TraceContext:
|
class TraceContext:
|
||||||
spans: list[Span] = []
|
spans: list[Span] = []
|
||||||
|
|
||||||
|
@ -256,11 +274,7 @@ class TelemetryHandler(logging.Handler):
|
||||||
if record.module in ("asyncio", "selector_events"):
|
if record.module in ("asyncio", "selector_events"):
|
||||||
return
|
return
|
||||||
|
|
||||||
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
global CURRENT_TRACE_CONTEXT
|
||||||
|
|
||||||
if BACKGROUND_LOGGER is None:
|
|
||||||
raise RuntimeError("Telemetry API not initialized")
|
|
||||||
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
context = CURRENT_TRACE_CONTEXT.get()
|
||||||
if context is None:
|
if context is None:
|
||||||
return
|
return
|
||||||
|
@ -269,7 +283,7 @@ class TelemetryHandler(logging.Handler):
|
||||||
if span is None:
|
if span is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
BACKGROUND_LOGGER.log_event(
|
enqueue_event(
|
||||||
UnstructuredLogEvent(
|
UnstructuredLogEvent(
|
||||||
trace_id=span.trace_id,
|
trace_id=span.trace_id,
|
||||||
span_id=span.span_id,
|
span_id=span.span_id,
|
||||||
|
|
|
@ -67,6 +67,38 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
||||||
raise AuthenticationRequiredError(exc) from exc
|
raise AuthenticationRequiredError(exc) from exc
|
||||||
if i == len(connection_strategies) - 1:
|
if i == len(connection_strategies) - 1:
|
||||||
raise
|
raise
|
||||||
|
except* httpx.ConnectError as eg:
|
||||||
|
# Connection refused, server down, network unreachable
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
|
||||||
|
logger.error(f"MCP connection error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.TimeoutException as eg:
|
||||||
|
# Request timeout, server too slow
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"MCP server at {endpoint} timed out"
|
||||||
|
logger.error(f"MCP timeout error: {error_msg}")
|
||||||
|
raise TimeoutError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.RequestError as eg:
|
||||||
|
# DNS resolution failures, network errors, invalid URLs
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
# Get the first exception's message for the error string
|
||||||
|
exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
|
||||||
|
error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
|
||||||
|
logger.error(f"MCP network error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
except* McpError:
|
except* McpError:
|
||||||
if i < len(connection_strategies) - 1:
|
if i < len(connection_strategies) - 1:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
|
@ -30,6 +30,9 @@ from openai.types.completion_choice import CompletionChoice
|
||||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||||
CompletionChoice.model_rebuild()
|
CompletionChoice.model_rebuild()
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
|
||||||
|
|
||||||
|
|
||||||
class InferenceMode(StrEnum):
|
class InferenceMode(StrEnum):
|
||||||
LIVE = "live"
|
LIVE = "live"
|
||||||
|
@ -51,7 +54,7 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
|
||||||
|
|
||||||
|
|
||||||
def get_inference_mode() -> InferenceMode:
|
def get_inference_mode() -> InferenceMode:
|
||||||
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower())
|
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "replay").lower())
|
||||||
|
|
||||||
|
|
||||||
def setup_inference_recording():
|
def setup_inference_recording():
|
||||||
|
@ -60,28 +63,18 @@ def setup_inference_recording():
|
||||||
to increase their reliability and reduce reliance on expensive, external services.
|
to increase their reliability and reduce reliance on expensive, external services.
|
||||||
|
|
||||||
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
||||||
Calls to the /models endpoint are not currently trapped. We probably need to add support for this.
|
|
||||||
|
|
||||||
Two environment variables are required:
|
Two environment variables are supported:
|
||||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'.
|
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
|
||||||
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in.
|
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
||||||
|
|
||||||
The recordings are stored in a SQLite database and a JSON file for each request. The SQLite database is used to
|
The recordings are stored as JSON files.
|
||||||
quickly find the correct recording for a given request. The JSON files are used to store the request and response
|
|
||||||
bodies.
|
|
||||||
"""
|
"""
|
||||||
mode = get_inference_mode()
|
mode = get_inference_mode()
|
||||||
|
|
||||||
if mode not in InferenceMode:
|
|
||||||
raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
|
|
||||||
|
|
||||||
if mode == InferenceMode.LIVE:
|
if mode == InferenceMode.LIVE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
|
storage_dir = os.environ.get("LLAMA_STACK_TEST_RECORDING_DIR", DEFAULT_STORAGE_DIR)
|
||||||
raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
|
|
||||||
storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
|
|
||||||
|
|
||||||
return inference_recording(mode=mode, storage_dir=storage_dir)
|
return inference_recording(mode=mode, storage_dir=storage_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,8 +127,8 @@ class ResponseStorage:
|
||||||
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
||||||
"""Store a request/response pair."""
|
"""Store a request/response pair."""
|
||||||
# Generate unique response filename
|
# Generate unique response filename
|
||||||
response_file = f"{request_hash[:12]}.json"
|
short_hash = request_hash[:12]
|
||||||
response_path = self.responses_dir / response_file
|
response_file = f"{short_hash}.json"
|
||||||
|
|
||||||
# Serialize response body if needed
|
# Serialize response body if needed
|
||||||
serialized_response = dict(response)
|
serialized_response = dict(response)
|
||||||
|
@ -147,6 +140,14 @@ class ResponseStorage:
|
||||||
# Handle single response
|
# Handle single response
|
||||||
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
||||||
|
|
||||||
|
# If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
|
||||||
|
endpoint = request.get("endpoint")
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
digest = _model_identifiers_digest(endpoint, response)
|
||||||
|
response_file = f"models-{short_hash}-{digest}.json"
|
||||||
|
|
||||||
|
response_path = self.responses_dir / response_file
|
||||||
|
|
||||||
# Save response to JSON file
|
# Save response to JSON file
|
||||||
with open(response_path, "w") as f:
|
with open(response_path, "w") as f:
|
||||||
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
||||||
|
@ -161,6 +162,17 @@ class ResponseStorage:
|
||||||
if not response_path.exists():
|
if not response_path.exists():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
return _recording_from_file(response_path)
|
||||||
|
|
||||||
|
def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
|
||||||
|
data = _recording_from_file(path)
|
||||||
|
results.append(data)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _recording_from_file(response_path) -> dict[str, Any]:
|
||||||
with open(response_path) as f:
|
with open(response_path) as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
|
@ -176,6 +188,61 @@ class ResponseStorage:
|
||||||
return cast(dict[str, Any], data)
|
return cast(dict[str, Any], data)
|
||||||
|
|
||||||
|
|
||||||
|
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
|
def _extract_model_identifiers():
|
||||||
|
"""Extract a stable set of identifiers for model-list endpoints.
|
||||||
|
|
||||||
|
Supported endpoints:
|
||||||
|
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
||||||
|
- '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
|
||||||
|
Returns a list of unique identifiers or None if structure doesn't match.
|
||||||
|
"""
|
||||||
|
body = response["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.get("models")
|
||||||
|
idents = [m.model for m in items]
|
||||||
|
else:
|
||||||
|
items = body.get("data")
|
||||||
|
idents = [m.id for m in items]
|
||||||
|
return sorted(set(idents))
|
||||||
|
|
||||||
|
identifiers = _extract_model_identifiers()
|
||||||
|
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
|
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
|
"""Return a single, unioned recording for supported model-list endpoints."""
|
||||||
|
seen: dict[str, dict[str, Any]] = {}
|
||||||
|
for rec in records:
|
||||||
|
body = rec["response"]["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.models
|
||||||
|
elif endpoint == "/v1/models":
|
||||||
|
items = body.data
|
||||||
|
else:
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for m in items:
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
key = m.id
|
||||||
|
else:
|
||||||
|
key = m.model
|
||||||
|
seen[key] = m
|
||||||
|
|
||||||
|
ordered = [seen[k] for k in sorted(seen.keys())]
|
||||||
|
canonical = records[0]
|
||||||
|
canonical_req = canonical.get("request", {})
|
||||||
|
if isinstance(canonical_req, dict):
|
||||||
|
canonical_req["endpoint"] = endpoint
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
body = {"data": ordered, "object": "list"}
|
||||||
|
else:
|
||||||
|
from ollama import ListResponse
|
||||||
|
|
||||||
|
body = ListResponse(models=ordered)
|
||||||
|
return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
|
||||||
|
|
||||||
|
|
||||||
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
|
@ -195,8 +262,6 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
raise ValueError(f"Unknown client type: {client_type}")
|
raise ValueError(f"Unknown client type: {client_type}")
|
||||||
|
|
||||||
url = base_url.rstrip("/") + endpoint
|
url = base_url.rstrip("/") + endpoint
|
||||||
|
|
||||||
# Normalize request for matching
|
|
||||||
method = "POST"
|
method = "POST"
|
||||||
headers = {}
|
headers = {}
|
||||||
body = kwargs
|
body = kwargs
|
||||||
|
@ -204,6 +269,11 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
request_hash = normalize_request(method, url, headers, body)
|
request_hash = normalize_request(method, url, headers, body)
|
||||||
|
|
||||||
if _current_mode == InferenceMode.REPLAY:
|
if _current_mode == InferenceMode.REPLAY:
|
||||||
|
# Special handling for model-list endpoints: return union of all responses
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
records = _current_storage._model_list_responses(request_hash[:12])
|
||||||
|
recording = _combine_model_list_responses(endpoint, records)
|
||||||
|
else:
|
||||||
recording = _current_storage.find_recording(request_hash)
|
recording = _current_storage.find_recording(request_hash)
|
||||||
if recording:
|
if recording:
|
||||||
response_body = recording["response"]["body"]
|
response_body = recording["response"]["body"]
|
||||||
|
@ -222,7 +292,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
f"No recorded response found for request hash: {request_hash}\n"
|
f"No recorded response found for request hash: {request_hash}\n"
|
||||||
f"Request: {method} {url} {body}\n"
|
f"Request: {method} {url} {body}\n"
|
||||||
f"Model: {body.get('model', 'unknown')}\n"
|
f"Model: {body.get('model', 'unknown')}\n"
|
||||||
f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
|
f"To record this response, run with LLAMA_STACK_TEST_INFERENCE_MODE=record"
|
||||||
)
|
)
|
||||||
|
|
||||||
elif _current_mode == InferenceMode.RECORD:
|
elif _current_mode == InferenceMode.RECORD:
|
||||||
|
@ -274,12 +344,14 @@ def patch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Store original methods for both OpenAI and Ollama clients
|
# Store original methods for both OpenAI and Ollama clients
|
||||||
_original_methods = {
|
_original_methods = {
|
||||||
"chat_completions_create": AsyncChatCompletions.create,
|
"chat_completions_create": AsyncChatCompletions.create,
|
||||||
"completions_create": AsyncCompletions.create,
|
"completions_create": AsyncCompletions.create,
|
||||||
"embeddings_create": AsyncEmbeddings.create,
|
"embeddings_create": AsyncEmbeddings.create,
|
||||||
|
"models_list": AsyncModels.list,
|
||||||
"ollama_generate": OllamaAsyncClient.generate,
|
"ollama_generate": OllamaAsyncClient.generate,
|
||||||
"ollama_chat": OllamaAsyncClient.chat,
|
"ollama_chat": OllamaAsyncClient.chat,
|
||||||
"ollama_embed": OllamaAsyncClient.embed,
|
"ollama_embed": OllamaAsyncClient.embed,
|
||||||
|
@ -304,10 +376,16 @@ def patch_inference_clients():
|
||||||
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def patched_models_list(self, *args, **kwargs):
|
||||||
|
return await _patched_inference_method(
|
||||||
|
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
# Apply OpenAI patches
|
# Apply OpenAI patches
|
||||||
AsyncChatCompletions.create = patched_chat_completions_create
|
AsyncChatCompletions.create = patched_chat_completions_create
|
||||||
AsyncCompletions.create = patched_completions_create
|
AsyncCompletions.create = patched_completions_create
|
||||||
AsyncEmbeddings.create = patched_embeddings_create
|
AsyncEmbeddings.create = patched_embeddings_create
|
||||||
|
AsyncModels.list = patched_models_list
|
||||||
|
|
||||||
# Create patched methods for Ollama client
|
# Create patched methods for Ollama client
|
||||||
async def patched_ollama_generate(self, *args, **kwargs):
|
async def patched_ollama_generate(self, *args, **kwargs):
|
||||||
|
@ -361,11 +439,13 @@ def unpatch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Restore OpenAI client methods
|
# Restore OpenAI client methods
|
||||||
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
||||||
AsyncCompletions.create = _original_methods["completions_create"]
|
AsyncCompletions.create = _original_methods["completions_create"]
|
||||||
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
||||||
|
AsyncModels.list = _original_methods["models_list"]
|
||||||
|
|
||||||
# Restore Ollama client methods if they were patched
|
# Restore Ollama client methods if they were patched
|
||||||
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
||||||
|
@ -379,16 +459,10 @@ def unpatch_inference_clients():
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
||||||
"""Context manager for inference recording/replaying."""
|
"""Context manager for inference recording/replaying."""
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
# Set defaults
|
|
||||||
if storage_dir is None:
|
|
||||||
storage_dir_path = Path.home() / ".llama" / "recordings"
|
|
||||||
else:
|
|
||||||
storage_dir_path = Path(storage_dir)
|
|
||||||
|
|
||||||
# Store previous state
|
# Store previous state
|
||||||
prev_mode = _current_mode
|
prev_mode = _current_mode
|
||||||
prev_storage = _current_storage
|
prev_storage = _current_storage
|
||||||
|
@ -397,7 +471,9 @@ def inference_recording(mode: str = "live", storage_dir: str | Path | None = Non
|
||||||
_current_mode = mode
|
_current_mode = mode
|
||||||
|
|
||||||
if mode in ["record", "replay"]:
|
if mode in ["record", "replay"]:
|
||||||
_current_storage = ResponseStorage(storage_dir_path)
|
if storage_dir is None:
|
||||||
|
raise ValueError("storage_dir is required for record and replay modes")
|
||||||
|
_current_storage = ResponseStorage(Path(storage_dir))
|
||||||
patch_inference_clients()
|
patch_inference_clients()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
509
llama_stack/ui/package-lock.json
generated
509
llama_stack/ui/package-lock.json
generated
|
@ -10,7 +10,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-collapsible": "^1.1.12",
|
"@radix-ui/react-collapsible": "^1.1.12",
|
||||||
"@radix-ui/react-dialog": "^1.1.13",
|
"@radix-ui/react-dialog": "^1.1.13",
|
||||||
"@radix-ui/react-dropdown-menu": "^2.1.14",
|
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
|
@ -18,18 +18,18 @@
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.20",
|
"llama-stack-client": "^0.2.21",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.0.0",
|
"react-dom": "^19.1.1",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.30.0",
|
"remeda": "^2.30.0",
|
||||||
"shiki": "^1.29.2",
|
"shiki": "^1.29.2",
|
||||||
"sonner": "^2.0.6",
|
"sonner": "^2.0.7",
|
||||||
"tailwind-merge": "^3.3.1"
|
"tailwind-merge": "^3.3.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
@ -2066,12 +2066,35 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-arrow": {
|
"node_modules/@radix-ui/react-arrow": {
|
||||||
"version": "1.1.6",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
||||||
"integrity": "sha512-2JMfHJf/eVnwq+2dewT3C0acmCWD3XiVA1Da+jTDqo342UlU13WvXtqHhG+yJw5JeQmu4ue2eMy6gcEArLBlcw==",
|
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-primitive": "2.1.2"
|
"@radix-ui/react-primitive": "2.1.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -2172,15 +2195,15 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-collection": {
|
"node_modules/@radix-ui/react-collection": {
|
||||||
"version": "1.1.6",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
||||||
"integrity": "sha512-PbhRFK4lIEw9ADonj48tiYWzkllz81TM7KVYyyMMw2cwHO7D5h4XKEblL8NlaRisTK3QTe6tBEhDccFUryxHBQ==",
|
"integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-slot": "1.2.2"
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -2197,21 +2220,26 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-primitive": {
|
||||||
"version": "1.2.2",
|
"version": "2.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2"
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
},
|
},
|
||||||
"peerDependenciesMeta": {
|
"peerDependenciesMeta": {
|
||||||
"@types/react": {
|
"@types/react": {
|
||||||
"optional": true
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -2342,17 +2370,17 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-dropdown-menu": {
|
"node_modules/@radix-ui/react-dropdown-menu": {
|
||||||
"version": "2.1.14",
|
"version": "2.1.16",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.14.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz",
|
||||||
"integrity": "sha512-lzuyNjoWOoaMFE/VC5FnAAYM16JmQA8ZmucOXtlhm2kKR5TSU95YLAueQ4JYuRmUJmBvSqXaVFGIfuukybwZJQ==",
|
"integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-menu": "2.1.14",
|
"@radix-ui/react-menu": "2.1.16",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
@ -2370,6 +2398,35 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-focus-guards": {
|
"node_modules/@radix-ui/react-focus-guards": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz",
|
||||||
|
@ -2429,26 +2486,26 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-menu": {
|
"node_modules/@radix-ui/react-menu": {
|
||||||
"version": "2.1.14",
|
"version": "2.1.16",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.14.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
|
||||||
"integrity": "sha512-0zSiBAIFq9GSKoSH5PdEaQeRB3RnEGxC+H2P0egtnKoKKLNBH8VBHyVO6/jskhjAezhOIplyRUj7U2lds9A+Yg==",
|
"integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-collection": "1.1.6",
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-direction": "1.1.1",
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
"@radix-ui/react-dismissable-layer": "1.1.9",
|
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||||
"@radix-ui/react-focus-guards": "1.1.2",
|
"@radix-ui/react-focus-guards": "1.1.3",
|
||||||
"@radix-ui/react-focus-scope": "1.1.6",
|
"@radix-ui/react-focus-scope": "1.1.7",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-popper": "1.2.6",
|
"@radix-ui/react-popper": "1.2.8",
|
||||||
"@radix-ui/react-portal": "1.1.8",
|
"@radix-ui/react-portal": "1.1.9",
|
||||||
"@radix-ui/react-presence": "1.1.4",
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-roving-focus": "1.1.9",
|
"@radix-ui/react-roving-focus": "1.1.11",
|
||||||
"@radix-ui/react-slot": "1.2.2",
|
"@radix-ui/react-slot": "1.2.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"aria-hidden": "^1.2.4",
|
"aria-hidden": "^1.2.4",
|
||||||
"react-remove-scroll": "^2.6.3"
|
"react-remove-scroll": "^2.6.3"
|
||||||
|
@ -2468,14 +2525,44 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/primitive": {
|
||||||
"version": "1.2.2",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
|
"version": "1.1.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||||
|
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2"
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"@radix-ui/react-use-escape-keydown": "1.1.1"
|
||||||
},
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-focus-guards": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
|
||||||
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
@ -2486,17 +2573,113 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-focus-scope": {
|
||||||
|
"version": "1.1.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
|
||||||
|
"integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-portal": {
|
||||||
|
"version": "1.1.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
|
"integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-presence": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
|
||||||
|
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-popper": {
|
"node_modules/@radix-ui/react-popper": {
|
||||||
"version": "1.2.6",
|
"version": "1.2.8",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
||||||
"integrity": "sha512-7iqXaOWIjDBfIG7aq8CUEeCSsQMLFdn7VEE8TaFz704DtEzpPHR7w/uuzRflvKgltqSAImgcmxQ7fFX3X7wasg==",
|
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@floating-ui/react-dom": "^2.0.0",
|
"@floating-ui/react-dom": "^2.0.0",
|
||||||
"@radix-ui/react-arrow": "1.1.6",
|
"@radix-ui/react-arrow": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||||
"@radix-ui/react-use-rect": "1.1.1",
|
"@radix-ui/react-use-rect": "1.1.1",
|
||||||
|
@ -2518,6 +2701,29 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-portal": {
|
"node_modules/@radix-ui/react-portal": {
|
||||||
"version": "1.1.8",
|
"version": "1.1.8",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.8.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.8.tgz",
|
||||||
|
@ -2608,18 +2814,18 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-roving-focus": {
|
"node_modules/@radix-ui/react-roving-focus": {
|
||||||
"version": "1.1.9",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz",
|
||||||
"integrity": "sha512-ZzrIFnMYHHCNqSNCsuN6l7wlewBEq0O0BCSBkabJMFXVO51LRUTq71gLP1UxFvmrXElqmPjA5VX7IqC9VpazAQ==",
|
"integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-collection": "1.1.6",
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-direction": "1.1.1",
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
},
|
},
|
||||||
|
@ -2638,6 +2844,35 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-select": {
|
"node_modules/@radix-ui/react-select": {
|
||||||
"version": "2.2.5",
|
"version": "2.2.5",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
|
||||||
|
@ -2681,55 +2916,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-arrow": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-primitive": "2.1.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-collection": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
|
||||||
"@radix-ui/react-context": "1.1.2",
|
|
||||||
"@radix-ui/react-primitive": "2.1.3",
|
|
||||||
"@radix-ui/react-slot": "1.2.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
|
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
"version": "1.1.10",
|
"version": "1.1.10",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
|
||||||
|
@ -2965,29 +3151,6 @@
|
||||||
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-primitive": "2.1.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
"version": "1.1.11",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||||
|
@ -3015,38 +3178,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": {
|
|
||||||
"version": "1.2.8",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
|
||||||
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@floating-ui/react-dom": "^2.0.0",
|
|
||||||
"@radix-ui/react-arrow": "1.1.7",
|
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
|
||||||
"@radix-ui/react-context": "1.1.2",
|
|
||||||
"@radix-ui/react-primitive": "2.1.3",
|
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
|
||||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
|
||||||
"@radix-ui/react-use-rect": "1.1.1",
|
|
||||||
"@radix-ui/react-use-size": "1.1.1",
|
|
||||||
"@radix-ui/rect": "1.1.1"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
|
||||||
"version": "1.1.9",
|
"version": "1.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
|
@ -3447,6 +3578,13 @@
|
||||||
"tailwindcss": "4.1.6"
|
"tailwindcss": "4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@tailwindcss/node/node_modules/tailwindcss": {
|
||||||
|
"version": "4.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||||
|
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@tailwindcss/oxide": {
|
"node_modules/@tailwindcss/oxide": {
|
||||||
"version": "4.1.6",
|
"version": "4.1.6",
|
||||||
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
|
||||||
|
@ -3707,6 +3845,13 @@
|
||||||
"tailwindcss": "4.1.6"
|
"tailwindcss": "4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
|
||||||
|
"version": "4.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||||
|
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@testing-library/dom": {
|
"node_modules/@testing-library/dom": {
|
||||||
"version": "10.4.1",
|
"version": "10.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
|
||||||
|
@ -4079,9 +4224,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/react-dom": {
|
"node_modules/@types/react-dom": {
|
||||||
"version": "19.1.5",
|
"version": "19.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.5.tgz",
|
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.9.tgz",
|
||||||
"integrity": "sha512-CMCjrWucUBZvohgZxkjd6S9h0nZxXjzus6yDfUb+xLxYM7VvjKNH1tQrE9GWLql1XoOP4/Ds3bwFqShHUYraGg==",
|
"integrity": "sha512-qXRuZaOsAdXKFyOhRBg6Lqqc0yay13vN7KrIg4L7N4aaHN68ma9OK3NE1BoDFgFOTfM7zg+3/8+2n8rLUH3OKQ==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
@ -10147,9 +10292,9 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/llama-stack-client": {
|
"node_modules/llama-stack-client": {
|
||||||
"version": "0.2.20",
|
"version": "0.2.21",
|
||||||
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
|
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.21.tgz",
|
||||||
"integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
|
"integrity": "sha512-rjU2Vx5xStxDYavU8K1An/SYXiQQjroLcK98B+p0Paz/a7OgRao2S0YwvThJjPUyChY4fO03UIXP9LpmHqlXWQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/node": "^18.11.18",
|
"@types/node": "^18.11.18",
|
||||||
|
@ -10240,9 +10385,9 @@
|
||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
"node_modules/lucide-react": {
|
"node_modules/lucide-react": {
|
||||||
"version": "0.510.0",
|
"version": "0.542.0",
|
||||||
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.510.0.tgz",
|
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.542.0.tgz",
|
||||||
"integrity": "sha512-p8SQRAMVh7NhsAIETokSqDrc5CHnDLbV29mMnzaXx+Vc/hnqQzwI2r0FMWCcoTXnbw2KEjy48xwpGdEL+ck06Q==",
|
"integrity": "sha512-w3hD8/SQB7+lzU2r4VdFyzzOzKnUjTZIF/MQJGSSvni7Llewni4vuViRppfRAa2guOsY5k4jZyxw/i9DQHv+dw==",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||||
|
@ -12448,24 +12593,24 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react": {
|
"node_modules/react": {
|
||||||
"version": "19.1.0",
|
"version": "19.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||||
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
"integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-dom": {
|
"node_modules/react-dom": {
|
||||||
"version": "19.1.0",
|
"version": "19.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.1.tgz",
|
||||||
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
"integrity": "sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.26.0"
|
"scheduler": "^0.26.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^19.1.0"
|
"react": "^19.1.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-is": {
|
"node_modules/react-is": {
|
||||||
|
@ -13285,9 +13430,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/sonner": {
|
"node_modules/sonner": {
|
||||||
"version": "2.0.6",
|
"version": "2.0.7",
|
||||||
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.6.tgz",
|
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
|
||||||
"integrity": "sha512-yHFhk8T/DK3YxjFQXIrcHT1rGEeTLliVzWbO0xN8GberVun2RiBnxAjXAYpZrqwEVHBG9asI/Li8TAAhN9m59Q==",
|
"integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
|
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
|
||||||
|
@ -13712,9 +13857,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/tailwindcss": {
|
"node_modules/tailwindcss": {
|
||||||
"version": "4.1.6",
|
"version": "4.1.13",
|
||||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
|
||||||
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
"integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-collapsible": "^1.1.12",
|
"@radix-ui/react-collapsible": "^1.1.12",
|
||||||
"@radix-ui/react-dialog": "^1.1.13",
|
"@radix-ui/react-dialog": "^1.1.13",
|
||||||
"@radix-ui/react-dropdown-menu": "^2.1.14",
|
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
|
@ -23,18 +23,18 @@
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.20",
|
"llama-stack-client": "^0.2.21",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.0.0",
|
"react-dom": "^19.1.1",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.30.0",
|
"remeda": "^2.30.0",
|
||||||
"shiki": "^1.29.2",
|
"shiki": "^1.29.2",
|
||||||
"sonner": "^2.0.6",
|
"sonner": "^2.0.7",
|
||||||
"tailwind-merge": "^3.3.1"
|
"tailwind-merge": "^3.3.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|
|
@ -7,7 +7,7 @@ required-version = ">=0.7.0"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "llama_stack"
|
name = "llama_stack"
|
||||||
version = "0.2.20"
|
version = "0.2.21"
|
||||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||||
description = "Llama Stack"
|
description = "Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -31,9 +31,8 @@ dependencies = [
|
||||||
"huggingface-hub>=0.34.0,<1.0",
|
"huggingface-hub>=0.34.0,<1.0",
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
"llama-stack-client>=0.2.20",
|
"llama-stack-client>=0.2.21",
|
||||||
"llama-api-client>=0.1.2",
|
"openai>=1.100.0", # for expires_after support
|
||||||
"openai>=1.99.6",
|
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"python-jose[cryptography]",
|
"python-jose[cryptography]",
|
||||||
|
@ -56,7 +55,7 @@ dependencies = [
|
||||||
ui = [
|
ui = [
|
||||||
"streamlit",
|
"streamlit",
|
||||||
"pandas",
|
"pandas",
|
||||||
"llama-stack-client>=0.2.20",
|
"llama-stack-client>=0.2.21",
|
||||||
"streamlit-option-menu",
|
"streamlit-option-menu",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -81,7 +80,6 @@ dev = [
|
||||||
unit = [
|
unit = [
|
||||||
"sqlite-vec",
|
"sqlite-vec",
|
||||||
"ollama",
|
"ollama",
|
||||||
"openai",
|
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"psycopg2-binary>=2.9.0",
|
"psycopg2-binary>=2.9.0",
|
||||||
|
@ -93,7 +91,7 @@ unit = [
|
||||||
"sqlalchemy[asyncio]>=2.0.41",
|
"sqlalchemy[asyncio]>=2.0.41",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"pymilvus>=2.5.12",
|
"pymilvus>=2.6.1",
|
||||||
"milvus-lite>=2.5.0",
|
"milvus-lite>=2.5.0",
|
||||||
"litellm",
|
"litellm",
|
||||||
"together",
|
"together",
|
||||||
|
@ -106,7 +104,6 @@ unit = [
|
||||||
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
||||||
# dependencies.
|
# dependencies.
|
||||||
test = [
|
test = [
|
||||||
"openai>=1.100.0", # for expires_after support
|
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"torch>=2.6.0",
|
"torch>=2.6.0",
|
||||||
|
@ -115,13 +112,13 @@ test = [
|
||||||
"psycopg2-binary>=2.9.0",
|
"psycopg2-binary>=2.9.0",
|
||||||
"pypdf",
|
"pypdf",
|
||||||
"mcp",
|
"mcp",
|
||||||
"datasets",
|
"datasets>=4.0.0",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
"transformers",
|
"transformers",
|
||||||
"sqlalchemy",
|
"sqlalchemy",
|
||||||
"sqlalchemy[asyncio]>=2.0.41",
|
"sqlalchemy[asyncio]>=2.0.41",
|
||||||
"requests",
|
"requests",
|
||||||
"pymilvus>=2.5.12",
|
"pymilvus>=2.6.1",
|
||||||
"milvus-lite>=2.5.0",
|
"milvus-lite>=2.5.0",
|
||||||
"weaviate-client>=4.16.4",
|
"weaviate-client>=4.16.4",
|
||||||
]
|
]
|
||||||
|
@ -146,7 +143,7 @@ docs = [
|
||||||
]
|
]
|
||||||
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
||||||
benchmark = [
|
benchmark = [
|
||||||
"locust>=2.37.14",
|
"locust>=2.39.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
71
scripts/get_setup_env.py
Executable file
71
scripts/get_setup_env.py
Executable file
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Small helper script to extract environment variables from a test setup.
|
||||||
|
Used by integration-tests.sh to set environment variables before starting the server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
|
||||||
|
|
||||||
|
|
||||||
|
def get_setup_env_vars(setup_name, suite_name=None):
|
||||||
|
"""
|
||||||
|
Get environment variables for a setup, with optional suite default fallback.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
setup_name: Name of the setup (e.g., 'ollama', 'gpt')
|
||||||
|
suite_name: Optional suite name to get default setup if setup_name is None
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary of environment variables
|
||||||
|
"""
|
||||||
|
# If no setup specified, try to get default from suite
|
||||||
|
if not setup_name and suite_name:
|
||||||
|
suite = SUITE_DEFINITIONS.get(suite_name)
|
||||||
|
if suite and suite.default_setup:
|
||||||
|
setup_name = suite.default_setup
|
||||||
|
|
||||||
|
if not setup_name:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
setup = SETUP_DEFINITIONS.get(setup_name)
|
||||||
|
if not setup:
|
||||||
|
print(
|
||||||
|
f"Error: Unknown setup '{setup_name}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return setup.env
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
|
||||||
|
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
|
||||||
|
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
|
||||||
|
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
env_vars = get_setup_env_vars(args.setup, args.suite)
|
||||||
|
|
||||||
|
if args.format == "bash":
|
||||||
|
# Output as bash export statements
|
||||||
|
for key, value in env_vars.items():
|
||||||
|
print(f"export {key}='{value}'")
|
||||||
|
elif args.format == "json":
|
||||||
|
import json
|
||||||
|
|
||||||
|
print(json.dumps(env_vars))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -14,8 +14,8 @@ set -euo pipefail
|
||||||
# Default values
|
# Default values
|
||||||
BRANCH=""
|
BRANCH=""
|
||||||
TEST_SUBDIRS=""
|
TEST_SUBDIRS=""
|
||||||
TEST_PROVIDER="ollama"
|
TEST_SETUP="ollama"
|
||||||
RUN_VISION_TESTS=false
|
TEST_SUITE="base"
|
||||||
TEST_PATTERN=""
|
TEST_PATTERN=""
|
||||||
|
|
||||||
# Help function
|
# Help function
|
||||||
|
@ -27,24 +27,24 @@ Trigger the integration test recording workflow remotely. This way you do not ne
|
||||||
|
|
||||||
OPTIONS:
|
OPTIONS:
|
||||||
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
|
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
|
||||||
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (REQUIRED)
|
-t, --suite SUITE Test suite to use: base, responses, vision, etc. (default: base)
|
||||||
-p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
|
-p, --setup SETUP Test setup to use: vllm, ollama, gpt, etc. (default: ollama)
|
||||||
-v, --run-vision-tests Include vision tests in the recording
|
-s, --subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite)
|
||||||
-k, --test-pattern PATTERN Regex pattern to pass to pytest -k
|
-k, --pattern PATTERN Regex pattern to pass to pytest -k
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
EXAMPLES:
|
EXAMPLES:
|
||||||
# Record tests for current branch with agents subdirectory
|
# Record tests for current branch with agents subdirectory
|
||||||
$0 --test-subdirs "agents"
|
$0 --subdirs "agents"
|
||||||
|
|
||||||
# Record tests for specific branch with vision tests
|
# Record tests for specific branch with vision tests
|
||||||
$0 -b my-feature-branch --test-subdirs "inference" --run-vision-tests
|
$0 -b my-feature-branch --suite vision
|
||||||
|
|
||||||
# Record multiple test subdirectories with specific provider
|
# Record multiple test subdirectories with specific setup
|
||||||
$0 --test-subdirs "agents,inference" --test-provider vllm
|
$0 --subdirs "agents,inference" --setup vllm
|
||||||
|
|
||||||
# Record tests matching a specific pattern
|
# Record tests matching a specific pattern
|
||||||
$0 --test-subdirs "inference" --test-pattern "test_streaming"
|
$0 --subdirs "inference" --pattern "test_streaming"
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
@ -63,19 +63,19 @@ while [[ $# -gt 0 ]]; do
|
||||||
BRANCH="$2"
|
BRANCH="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
-s|--test-subdirs)
|
-s|--subdirs)
|
||||||
TEST_SUBDIRS="$2"
|
TEST_SUBDIRS="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
-p|--test-provider)
|
-p|--setup)
|
||||||
TEST_PROVIDER="$2"
|
TEST_SETUP="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
-v|--run-vision-tests)
|
-t|--suite)
|
||||||
RUN_VISION_TESTS=true
|
TEST_SUITE="$2"
|
||||||
shift
|
shift 2
|
||||||
;;
|
;;
|
||||||
-k|--test-pattern)
|
-k|--pattern)
|
||||||
TEST_PATTERN="$2"
|
TEST_PATTERN="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
@ -92,22 +92,17 @@ while [[ $# -gt 0 ]]; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Validate required parameters
|
# Validate required parameters
|
||||||
if [[ -z "$TEST_SUBDIRS" ]]; then
|
if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then
|
||||||
echo "Error: --test-subdirs is required"
|
echo "Error: --subdirs or --suite is required"
|
||||||
echo "Please specify which test subdirectories to run, e.g.:"
|
echo "Please specify which test subdirectories to run or test suite to use, e.g.:"
|
||||||
echo " $0 --test-subdirs \"agents,inference\""
|
echo " $0 --subdirs \"agents,inference\""
|
||||||
echo " $0 --test-subdirs \"inference\" --run-vision-tests"
|
echo " $0 --suite vision"
|
||||||
echo ""
|
echo ""
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Validate test provider
|
# Validate test setup (optional - setups are validated by the workflow itself)
|
||||||
if [[ "$TEST_PROVIDER" != "vllm" && "$TEST_PROVIDER" != "ollama" ]]; then
|
# Common setups: ollama, vllm, gpt, etc.
|
||||||
echo "❌ Error: Invalid test provider '$TEST_PROVIDER'"
|
|
||||||
echo " Supported providers: vllm, ollama"
|
|
||||||
echo " Example: $0 --test-subdirs \"agents\" --test-provider vllm"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if required tools are installed
|
# Check if required tools are installed
|
||||||
if ! command -v gh &> /dev/null; then
|
if ! command -v gh &> /dev/null; then
|
||||||
|
@ -237,22 +232,25 @@ fi
|
||||||
# Build the workflow dispatch command
|
# Build the workflow dispatch command
|
||||||
echo "Triggering integration test recording workflow..."
|
echo "Triggering integration test recording workflow..."
|
||||||
echo "Branch: $BRANCH"
|
echo "Branch: $BRANCH"
|
||||||
echo "Test provider: $TEST_PROVIDER"
|
echo "Test setup: $TEST_SETUP"
|
||||||
echo "Test subdirs: $TEST_SUBDIRS"
|
echo "Test subdirs: $TEST_SUBDIRS"
|
||||||
echo "Run vision tests: $RUN_VISION_TESTS"
|
echo "Test suite: $TEST_SUITE"
|
||||||
echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
|
echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Prepare inputs for gh workflow run
|
# Prepare inputs for gh workflow run
|
||||||
INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
|
INPUTS=
|
||||||
if [[ -n "$TEST_PROVIDER" ]]; then
|
if [[ -n "$TEST_SUBDIRS" ]]; then
|
||||||
INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
|
INPUTS="$INPUTS -f subdirs='$TEST_SUBDIRS'"
|
||||||
fi
|
fi
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
if [[ -n "$TEST_SETUP" ]]; then
|
||||||
INPUTS="$INPUTS -f run-vision-tests=true"
|
INPUTS="$INPUTS -f test-setup='$TEST_SETUP'"
|
||||||
|
fi
|
||||||
|
if [[ -n "$TEST_SUITE" ]]; then
|
||||||
|
INPUTS="$INPUTS -f suite='$TEST_SUITE'"
|
||||||
fi
|
fi
|
||||||
if [[ -n "$TEST_PATTERN" ]]; then
|
if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"
|
INPUTS="$INPUTS -f pattern='$TEST_PATTERN'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run the workflow
|
# Run the workflow
|
||||||
|
|
|
@ -13,10 +13,10 @@ set -euo pipefail
|
||||||
|
|
||||||
# Default values
|
# Default values
|
||||||
STACK_CONFIG=""
|
STACK_CONFIG=""
|
||||||
PROVIDER=""
|
TEST_SUITE="base"
|
||||||
|
TEST_SETUP=""
|
||||||
TEST_SUBDIRS=""
|
TEST_SUBDIRS=""
|
||||||
TEST_PATTERN=""
|
TEST_PATTERN=""
|
||||||
RUN_VISION_TESTS="false"
|
|
||||||
INFERENCE_MODE="replay"
|
INFERENCE_MODE="replay"
|
||||||
EXTRA_PARAMS=""
|
EXTRA_PARAMS=""
|
||||||
|
|
||||||
|
@ -27,25 +27,30 @@ Usage: $0 [OPTIONS]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--stack-config STRING Stack configuration to use (required)
|
--stack-config STRING Stack configuration to use (required)
|
||||||
--provider STRING Provider to use (ollama, vllm, etc.) (required)
|
--suite STRING Test suite to run (default: 'base')
|
||||||
--test-subdirs STRING Comma-separated list of test subdirectories to run (default: 'inference')
|
--setup STRING Test setup (models, env) to use (e.g., 'ollama', 'ollama-vision', 'gpt', 'vllm')
|
||||||
--run-vision-tests Run vision tests instead of regular tests
|
|
||||||
--inference-mode STRING Inference mode: record or replay (default: replay)
|
--inference-mode STRING Inference mode: record or replay (default: replay)
|
||||||
--test-pattern STRING Regex pattern to pass to pytest -k
|
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
|
||||||
|
--pattern STRING Regex pattern to pass to pytest -k
|
||||||
--help Show this help message
|
--help Show this help message
|
||||||
|
|
||||||
|
Suites are defined in tests/integration/suites.py and define which tests to run.
|
||||||
|
Setups are defined in tests/integration/setups.py and provide global configuration (models, env).
|
||||||
|
|
||||||
|
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# Basic inference tests with ollama
|
# Basic inference tests with ollama
|
||||||
$0 --stack-config server:ci-tests --provider ollama
|
$0 --stack-config server:ci-tests --suite base --setup ollama
|
||||||
|
|
||||||
# Multiple test directories with vllm
|
# Multiple test directories with vllm
|
||||||
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
|
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
|
||||||
|
|
||||||
# Vision tests with ollama
|
# Vision tests with ollama
|
||||||
$0 --stack-config server:ci-tests --provider ollama --run-vision-tests
|
$0 --stack-config server:ci-tests --suite vision # default setup for this suite is ollama-vision
|
||||||
|
|
||||||
# Record mode for updating test recordings
|
# Record mode for updating test recordings
|
||||||
$0 --stack-config server:ci-tests --provider ollama --inference-mode record
|
$0 --stack-config server:ci-tests --suite base --inference-mode record
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,23 +61,23 @@ while [[ $# -gt 0 ]]; do
|
||||||
STACK_CONFIG="$2"
|
STACK_CONFIG="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--provider)
|
--setup)
|
||||||
PROVIDER="$2"
|
TEST_SETUP="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--test-subdirs)
|
--subdirs)
|
||||||
TEST_SUBDIRS="$2"
|
TEST_SUBDIRS="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--run-vision-tests)
|
--suite)
|
||||||
RUN_VISION_TESTS="true"
|
TEST_SUITE="$2"
|
||||||
shift
|
shift 2
|
||||||
;;
|
;;
|
||||||
--inference-mode)
|
--inference-mode)
|
||||||
INFERENCE_MODE="$2"
|
INFERENCE_MODE="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--test-pattern)
|
--pattern)
|
||||||
TEST_PATTERN="$2"
|
TEST_PATTERN="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
@ -96,18 +101,23 @@ if [[ -z "$STACK_CONFIG" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -z "$PROVIDER" ]]; then
|
if [[ -z "$TEST_SETUP" && -n "$TEST_SUBDIRS" ]]; then
|
||||||
echo "Error: --provider is required"
|
echo "Error: --test-setup is required when --test-subdirs is provided"
|
||||||
usage
|
usage
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$TEST_SUITE" && -z "$TEST_SUBDIRS" ]]; then
|
||||||
|
echo "Error: --test-suite or --test-subdirs is required"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
echo "=== Llama Stack Integration Test Runner ==="
|
echo "=== Llama Stack Integration Test Runner ==="
|
||||||
echo "Stack Config: $STACK_CONFIG"
|
echo "Stack Config: $STACK_CONFIG"
|
||||||
echo "Provider: $PROVIDER"
|
echo "Setup: $TEST_SETUP"
|
||||||
echo "Test Subdirs: $TEST_SUBDIRS"
|
|
||||||
echo "Vision Tests: $RUN_VISION_TESTS"
|
|
||||||
echo "Inference Mode: $INFERENCE_MODE"
|
echo "Inference Mode: $INFERENCE_MODE"
|
||||||
|
echo "Test Suite: $TEST_SUITE"
|
||||||
|
echo "Test Subdirs: $TEST_SUBDIRS"
|
||||||
echo "Test Pattern: $TEST_PATTERN"
|
echo "Test Pattern: $TEST_PATTERN"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
@ -122,31 +132,28 @@ echo ""
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
export LLAMA_STACK_CLIENT_TIMEOUT=300
|
export LLAMA_STACK_CLIENT_TIMEOUT=300
|
||||||
export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
|
|
||||||
|
|
||||||
# Configure provider-specific settings
|
|
||||||
if [[ "$PROVIDER" == "ollama" ]]; then
|
|
||||||
export OLLAMA_URL="http://0.0.0.0:11434"
|
|
||||||
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
|
|
||||||
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
|
||||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
|
||||||
else
|
|
||||||
export VLLM_URL="http://localhost:8000/v1"
|
|
||||||
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
|
|
||||||
EXTRA_PARAMS=""
|
|
||||||
fi
|
|
||||||
|
|
||||||
THIS_DIR=$(dirname "$0")
|
THIS_DIR=$(dirname "$0")
|
||||||
|
|
||||||
|
if [[ -n "$TEST_SETUP" ]]; then
|
||||||
|
EXTRA_PARAMS="--setup=$TEST_SETUP"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply setup-specific environment variables (needed for server startup and tests)
|
||||||
|
echo "=== Applying Setup Environment Variables ==="
|
||||||
|
|
||||||
|
# the server needs this
|
||||||
|
export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
|
||||||
|
|
||||||
|
SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash)
|
||||||
|
echo "Setting up environment variables:"
|
||||||
|
echo "$SETUP_ENV"
|
||||||
|
eval "$SETUP_ENV"
|
||||||
|
echo ""
|
||||||
|
|
||||||
ROOT_DIR="$THIS_DIR/.."
|
ROOT_DIR="$THIS_DIR/.."
|
||||||
cd $ROOT_DIR
|
cd $ROOT_DIR
|
||||||
|
|
||||||
# Set recording directory
|
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision"
|
|
||||||
else
|
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check if "llama" and "pytest" are available. this script does not use `uv run` given
|
# check if "llama" and "pytest" are available. this script does not use `uv run` given
|
||||||
# it can be used in a pre-release environment where we have not been able to tell
|
# it can be used in a pre-release environment where we have not been able to tell
|
||||||
# uv about pre-release dependencies properly (yet).
|
# uv about pre-release dependencies properly (yet).
|
||||||
|
@ -162,6 +169,18 @@ fi
|
||||||
|
|
||||||
# Start Llama Stack Server if needed
|
# Start Llama Stack Server if needed
|
||||||
if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
||||||
|
stop_server() {
|
||||||
|
echo "Stopping Llama Stack Server..."
|
||||||
|
pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
|
||||||
|
if [[ -n "$pids" ]]; then
|
||||||
|
echo "Killing Llama Stack Server processes: $pids"
|
||||||
|
kill -9 $pids
|
||||||
|
else
|
||||||
|
echo "No Llama Stack Server processes found ?!"
|
||||||
|
fi
|
||||||
|
echo "Llama Stack Server stopped"
|
||||||
|
}
|
||||||
|
|
||||||
# check if server is already running
|
# check if server is already running
|
||||||
if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then
|
if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then
|
||||||
echo "Llama Stack Server is already running, skipping start"
|
echo "Llama Stack Server is already running, skipping start"
|
||||||
|
@ -185,14 +204,16 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
||||||
done
|
done
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
trap stop_server EXIT ERR INT TERM
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
echo "=== Running Integration Tests ==="
|
echo "=== Running Integration Tests ==="
|
||||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||||
|
|
||||||
# Additional exclusions for vllm provider
|
# Additional exclusions for vllm setup
|
||||||
if [[ "$PROVIDER" == "vllm" ]]; then
|
if [[ "$TEST_SETUP" == "vllm" ]]; then
|
||||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -201,56 +222,12 @@ if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run vision tests if specified
|
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
|
||||||
echo "Running vision tests..."
|
|
||||||
set +e
|
|
||||||
pytest -s -v tests/integration/inference/test_vision_inference.py \
|
|
||||||
--stack-config="$STACK_CONFIG" \
|
|
||||||
-k "$PYTEST_PATTERN" \
|
|
||||||
--vision-model=ollama/llama3.2-vision:11b \
|
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
|
||||||
--color=yes $EXTRA_PARAMS \
|
|
||||||
--capture=tee-sys
|
|
||||||
exit_code=$?
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [ $exit_code -eq 0 ]; then
|
|
||||||
echo "✅ Vision tests completed successfully"
|
|
||||||
elif [ $exit_code -eq 5 ]; then
|
|
||||||
echo "⚠️ No vision tests collected (pattern matched no tests)"
|
|
||||||
else
|
|
||||||
echo "❌ Vision tests failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Run regular tests
|
|
||||||
if [[ -z "$TEST_SUBDIRS" ]]; then
|
|
||||||
TEST_SUBDIRS=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
|
||||||
sed 's|tests/integration/||' |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
|
||||||
sort)
|
|
||||||
fi
|
|
||||||
echo "Test subdirs to run: $TEST_SUBDIRS"
|
echo "Test subdirs to run: $TEST_SUBDIRS"
|
||||||
|
|
||||||
|
if [[ -n "$TEST_SUBDIRS" ]]; then
|
||||||
# Collect all test files for the specified test types
|
# Collect all test files for the specified test types
|
||||||
TEST_FILES=""
|
TEST_FILES=""
|
||||||
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
|
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
|
||||||
# Skip certain test types for vllm provider
|
|
||||||
if [[ "$PROVIDER" == "vllm" ]]; then
|
|
||||||
if [[ "$test_subdir" == "safety" ]] || [[ "$test_subdir" == "post_training" ]] || [[ "$test_subdir" == "tool_runtime" ]]; then
|
|
||||||
echo "Skipping $test_subdir for vllm provider"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$STACK_CONFIG" != *"server:"* ]] && [[ "$test_subdir" == "batches" ]]; then
|
|
||||||
echo "Skipping $test_subdir for library client until types are supported"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -d "tests/integration/$test_subdir" ]]; then
|
if [[ -d "tests/integration/$test_subdir" ]]; then
|
||||||
# Find all Python test files in this directory
|
# Find all Python test files in this directory
|
||||||
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
|
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
|
||||||
|
@ -272,15 +249,23 @@ echo ""
|
||||||
echo "=== Running all collected tests in a single pytest command ==="
|
echo "=== Running all collected tests in a single pytest command ==="
|
||||||
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
||||||
|
|
||||||
|
PYTEST_TARGET="$TEST_FILES"
|
||||||
|
else
|
||||||
|
PYTEST_TARGET="tests/integration/"
|
||||||
|
EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE"
|
||||||
|
fi
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
pytest -s -v $TEST_FILES \
|
set -x
|
||||||
|
pytest -s -v $PYTEST_TARGET \
|
||||||
--stack-config="$STACK_CONFIG" \
|
--stack-config="$STACK_CONFIG" \
|
||||||
|
--inference-mode="$INFERENCE_MODE" \
|
||||||
-k "$PYTEST_PATTERN" \
|
-k "$PYTEST_PATTERN" \
|
||||||
--text-model="$TEXT_MODEL" \
|
$EXTRA_PARAMS \
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
--color=yes \
|
||||||
--color=yes $EXTRA_PARAMS \
|
|
||||||
--capture=tee-sys
|
--capture=tee-sys
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
|
set +x
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
if [ $exit_code -eq 0 ]; then
|
if [ $exit_code -eq 0 ]; then
|
||||||
|
|
|
@ -38,26 +38,15 @@ For running integration tests, you must provide a few things:
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
|
||||||
- Whether you are using replay or live mode for inference. This is specified with the LLAMA_STACK_TEST_INFERENCE_MODE environment variable. The default mode currently is "live" -- that is certainly surprising, but we will fix this soon.
|
|
||||||
|
|
||||||
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
||||||
|
|
||||||
You can run the integration tests in replay mode with:
|
You can run the integration tests in replay mode with:
|
||||||
```bash
|
```bash
|
||||||
# Run all tests with existing recordings
|
# Run all tests with existing recordings
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
|
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
uv run --group test \
|
uv run --group test \
|
||||||
pytest -sv tests/integration/ --stack-config=starter
|
pytest -sv tests/integration/ --stack-config=starter
|
||||||
```
|
```
|
||||||
|
|
||||||
If you don't specify LLAMA_STACK_TEST_INFERENCE_MODE, by default it will be in "live" mode -- that is, it will make real API calls.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Test against live APIs
|
|
||||||
FIREWORKS_API_KEY=your_key pytest -sv tests/integration/inference --stack-config=starter
|
|
||||||
```
|
|
||||||
|
|
||||||
### Re-recording tests
|
### Re-recording tests
|
||||||
|
|
||||||
#### Local Re-recording (Manual Setup Required)
|
#### Local Re-recording (Manual Setup Required)
|
||||||
|
@ -66,7 +55,6 @@ If you want to re-record tests locally, you can do so with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
uv run --group test \
|
uv run --group test \
|
||||||
pytest -sv tests/integration/ --stack-config=starter -k "<appropriate test name>"
|
pytest -sv tests/integration/ --stack-config=starter -k "<appropriate test name>"
|
||||||
```
|
```
|
||||||
|
@ -89,7 +77,7 @@ You must be careful when re-recording. CI workflows assume a specific setup for
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference"
|
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference"
|
||||||
|
|
||||||
# Record with vision tests enabled
|
# Record with vision tests enabled
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "inference" --run-vision-tests
|
./scripts/github/schedule-record-workflow.sh --test-suite vision
|
||||||
|
|
||||||
# Record with specific provider
|
# Record with specific provider
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm
|
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm
|
||||||
|
|
|
@ -6,8 +6,6 @@ Integration tests verify complete workflows across different providers using Lla
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run all integration tests with existing recordings
|
# Run all integration tests with existing recordings
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
|
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
uv run --group test \
|
uv run --group test \
|
||||||
pytest -sv tests/integration/ --stack-config=starter
|
pytest -sv tests/integration/ --stack-config=starter
|
||||||
```
|
```
|
||||||
|
@ -42,6 +40,37 @@ Model parameters can be influenced by the following options:
|
||||||
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
||||||
if no model is specified.
|
if no model is specified.
|
||||||
|
|
||||||
|
### Suites and Setups
|
||||||
|
|
||||||
|
- `--suite`: single named suite that narrows which tests are collected.
|
||||||
|
- Available suites:
|
||||||
|
- `base`: collects most tests (excludes responses and post_training)
|
||||||
|
- `responses`: collects tests under `tests/integration/responses` (needs strong tool-calling models)
|
||||||
|
- `vision`: collects only `tests/integration/inference/test_vision_inference.py`
|
||||||
|
- `--setup`: global configuration that can be used with any suite. Setups prefill model/env defaults; explicit CLI flags always win.
|
||||||
|
- Available setups:
|
||||||
|
- `ollama`: Local Ollama provider with lightweight models (sets OLLAMA_URL, uses llama3.2:3b-instruct-fp16)
|
||||||
|
- `vllm`: VLLM provider for efficient local inference (sets VLLM_URL, uses Llama-3.2-1B-Instruct)
|
||||||
|
- `gpt`: OpenAI GPT models for high-quality responses (uses gpt-4o)
|
||||||
|
- `claude`: Anthropic Claude models for high-quality responses (uses claude-3-5-sonnet)
|
||||||
|
|
||||||
|
Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Fast responses run with a strong tool-calling model
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=responses --setup=gpt
|
||||||
|
|
||||||
|
# Fast single-file vision run with Ollama defaults
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=vision --setup=ollama
|
||||||
|
|
||||||
|
# Base suite with VLLM for performance
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter --suite=base --setup=vllm
|
||||||
|
|
||||||
|
# Override a default from setup
|
||||||
|
pytest -s -v tests/integration --stack-config=server:starter \
|
||||||
|
--suite=responses --setup=gpt --embedding-model=text-embedding-3-small
|
||||||
|
```
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
### Testing against a Server
|
### Testing against a Server
|
||||||
|
@ -98,29 +127,24 @@ pytest -s -v tests/integration/vector_io/ \
|
||||||
|
|
||||||
The testing system supports three modes controlled by environment variables:
|
The testing system supports three modes controlled by environment variables:
|
||||||
|
|
||||||
### LIVE Mode (Default)
|
### REPLAY Mode (Default)
|
||||||
Tests make real API calls:
|
Uses cached responses instead of making API calls:
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
### RECORD Mode
|
### RECORD Mode
|
||||||
Captures API interactions for later replay:
|
Captures API interactions for later replay:
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
pytest tests/integration/inference/test_new_feature.py --inference-mode=record
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest tests/integration/inference/test_new_feature.py
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### REPLAY Mode
|
### LIVE Mode
|
||||||
Uses cached responses instead of making API calls:
|
Tests make real API calls (but not recorded):
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
|
pytest tests/integration/ --inference-mode=live
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest tests/integration/
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that right now you must specify the recording directory. This is because different tests use different recording directories and we don't (yet) have a fool-proof way to map a test to a recording directory. We are working on this.
|
By default, the recording directory is `tests/integration/recordings`. You can override this by setting the `LLAMA_STACK_TEST_RECORDING_DIR` environment variable.
|
||||||
|
|
||||||
## Managing Recordings
|
## Managing Recordings
|
||||||
|
|
||||||
|
@ -138,16 +162,14 @@ cat recordings/responses/abc123.json | jq '.'
|
||||||
#### Remote Re-recording (Recommended)
|
#### Remote Re-recording (Recommended)
|
||||||
Use the automated workflow script for easier re-recording:
|
Use the automated workflow script for easier re-recording:
|
||||||
```bash
|
```bash
|
||||||
./scripts/github/schedule-record-workflow.sh --test-subdirs "inference,agents"
|
./scripts/github/schedule-record-workflow.sh --subdirs "inference,agents"
|
||||||
```
|
```
|
||||||
See the [main testing guide](../README.md#remote-re-recording-recommended) for full details.
|
See the [main testing guide](../README.md#remote-re-recording-recommended) for full details.
|
||||||
|
|
||||||
#### Local Re-recording
|
#### Local Re-recording
|
||||||
```bash
|
```bash
|
||||||
# Re-record specific tests
|
# Re-record specific tests
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE=record \
|
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py --inference-mode=record
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
|
|
||||||
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that when re-recording tests, you must use a Stack pointing to a server (i.e., `server:starter`). This subtlety exists because the set of tests run in server are a superset of the set of tests run in the library client.
|
Note that when re-recording tests, you must use a Stack pointing to a server (i.e., `server:starter`). This subtlety exists because the set of tests run in server are a superset of the set of tests run in the library client.
|
||||||
|
|
|
@ -268,3 +268,58 @@ class TestBatchesIntegration:
|
||||||
|
|
||||||
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
||||||
assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
|
assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
|
||||||
|
|
||||||
|
def test_batch_e2e_completions(self, openai_client, batch_helper, text_model_id):
|
||||||
|
"""Run an end-to-end batch with a single successful text completion request."""
|
||||||
|
request_body = {"model": text_model_id, "prompt": "Say completions", "max_tokens": 20}
|
||||||
|
|
||||||
|
batch_requests = [
|
||||||
|
{
|
||||||
|
"custom_id": "success-1",
|
||||||
|
"method": "POST",
|
||||||
|
"url": "/v1/completions",
|
||||||
|
"body": request_body,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with batch_helper.create_file(batch_requests) as uploaded_file:
|
||||||
|
batch = openai_client.batches.create(
|
||||||
|
input_file_id=uploaded_file.id,
|
||||||
|
endpoint="/v1/completions",
|
||||||
|
completion_window="24h",
|
||||||
|
metadata={"test": "e2e_completions_success"},
|
||||||
|
)
|
||||||
|
|
||||||
|
final_batch = batch_helper.wait_for(
|
||||||
|
batch.id,
|
||||||
|
max_wait_time=3 * 60,
|
||||||
|
expected_statuses={"completed"},
|
||||||
|
timeout_action="skip",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert final_batch.status == "completed"
|
||||||
|
assert final_batch.request_counts is not None
|
||||||
|
assert final_batch.request_counts.total == 1
|
||||||
|
assert final_batch.request_counts.completed == 1
|
||||||
|
assert final_batch.output_file_id is not None
|
||||||
|
|
||||||
|
output_content = openai_client.files.content(final_batch.output_file_id)
|
||||||
|
if isinstance(output_content, str):
|
||||||
|
output_text = output_content
|
||||||
|
else:
|
||||||
|
output_text = output_content.content.decode("utf-8")
|
||||||
|
|
||||||
|
output_lines = output_text.strip().split("\n")
|
||||||
|
assert len(output_lines) == 1
|
||||||
|
|
||||||
|
result = json.loads(output_lines[0])
|
||||||
|
assert result["custom_id"] == "success-1"
|
||||||
|
assert "response" in result
|
||||||
|
assert result["response"]["status_code"] == 200
|
||||||
|
|
||||||
|
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
|
||||||
|
assert deleted_output_file.deleted
|
||||||
|
|
||||||
|
if final_batch.error_file_id is not None:
|
||||||
|
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
|
||||||
|
assert deleted_error_file.deleted
|
||||||
|
|
|
@ -6,15 +6,17 @@
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
import platform
|
|
||||||
import textwrap
|
import textwrap
|
||||||
import time
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
|
from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
|
||||||
|
|
||||||
logger = get_logger(__name__, category="tests")
|
logger = get_logger(__name__, category="tests")
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +32,8 @@ def pytest_runtest_makereport(item, call):
|
||||||
def pytest_sessionstart(session):
|
def pytest_sessionstart(session):
|
||||||
# stop macOS from complaining about duplicate OpenMP libraries
|
# stop macOS from complaining about duplicate OpenMP libraries
|
||||||
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
||||||
|
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
|
||||||
|
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
|
||||||
|
|
||||||
|
|
||||||
def pytest_runtest_teardown(item):
|
def pytest_runtest_teardown(item):
|
||||||
|
@ -59,9 +63,36 @@ def pytest_configure(config):
|
||||||
key, value = env_var.split("=", 1)
|
key, value = env_var.split("=", 1)
|
||||||
os.environ[key] = value
|
os.environ[key] = value
|
||||||
|
|
||||||
if platform.system() == "Darwin": # Darwin is the system name for macOS
|
inference_mode = config.getoption("--inference-mode")
|
||||||
os.environ["DISABLE_CODE_SANDBOX"] = "1"
|
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = inference_mode
|
||||||
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
|
|
||||||
|
suite = config.getoption("--suite")
|
||||||
|
if suite:
|
||||||
|
if suite not in SUITE_DEFINITIONS:
|
||||||
|
raise pytest.UsageError(f"Unknown suite: {suite}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}")
|
||||||
|
|
||||||
|
# Apply setups (global parameterizations): env + defaults
|
||||||
|
setup = config.getoption("--setup")
|
||||||
|
if suite and not setup:
|
||||||
|
setup = SUITE_DEFINITIONS[suite].default_setup
|
||||||
|
|
||||||
|
if setup:
|
||||||
|
if setup not in SETUP_DEFINITIONS:
|
||||||
|
raise pytest.UsageError(
|
||||||
|
f"Unknown setup '{setup}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
setup_obj = SETUP_DEFINITIONS[setup]
|
||||||
|
logger.info(f"Applying setup '{setup}'{' for suite ' + suite if suite else ''}")
|
||||||
|
# Apply env first
|
||||||
|
for k, v in setup_obj.env.items():
|
||||||
|
if k not in os.environ:
|
||||||
|
os.environ[k] = str(v)
|
||||||
|
# Apply defaults if not provided explicitly
|
||||||
|
for dest, value in setup_obj.defaults.items():
|
||||||
|
current = getattr(config.option, dest, None)
|
||||||
|
if not current:
|
||||||
|
setattr(config.option, dest, value)
|
||||||
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
|
@ -103,16 +134,32 @@ def pytest_addoption(parser):
|
||||||
default=384,
|
default=384,
|
||||||
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--record-responses",
|
"--inference-mode",
|
||||||
action="store_true",
|
help="Inference mode: { record, replay, live } (default: replay)",
|
||||||
help="Record new API responses instead of using cached ones.",
|
choices=["record", "replay", "live"],
|
||||||
|
default="replay",
|
||||||
)
|
)
|
||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--report",
|
"--report",
|
||||||
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
|
||||||
|
suite_help = (
|
||||||
|
f"Single test suite to run (narrows collection). Available: {available_suites}. Example: --suite=responses"
|
||||||
|
)
|
||||||
|
parser.addoption("--suite", help=suite_help)
|
||||||
|
|
||||||
|
# Global setups for any suite
|
||||||
|
available_setups = ", ".join(sorted(SETUP_DEFINITIONS.keys()))
|
||||||
|
setup_help = (
|
||||||
|
f"Global test setup configuration. Available: {available_setups}. "
|
||||||
|
"Can be used with any suite. Example: --setup=ollama"
|
||||||
|
)
|
||||||
|
parser.addoption("--setup", help=setup_help)
|
||||||
|
|
||||||
|
|
||||||
MODEL_SHORT_IDS = {
|
MODEL_SHORT_IDS = {
|
||||||
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
||||||
|
@ -195,3 +242,36 @@ def pytest_generate_tests(metafunc):
|
||||||
|
|
||||||
|
|
||||||
pytest_plugins = ["tests.integration.fixtures.common"]
|
pytest_plugins = ["tests.integration.fixtures.common"]
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
|
||||||
|
"""Skip collecting paths outside the selected suite roots for speed."""
|
||||||
|
suite = config.getoption("--suite")
|
||||||
|
if not suite:
|
||||||
|
return False
|
||||||
|
|
||||||
|
sobj = SUITE_DEFINITIONS.get(suite)
|
||||||
|
roots: list[str] = sobj.get("roots", []) if isinstance(sobj, dict) else getattr(sobj, "roots", [])
|
||||||
|
if not roots:
|
||||||
|
return False
|
||||||
|
|
||||||
|
p = Path(str(path)).resolve()
|
||||||
|
|
||||||
|
# Only constrain within tests/integration to avoid ignoring unrelated tests
|
||||||
|
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
|
||||||
|
if not p.is_relative_to(integration_root):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for r in roots:
|
||||||
|
rp = (Path(str(config.rootpath)) / r).resolve()
|
||||||
|
if rp.is_file():
|
||||||
|
# Allow the exact file and any ancestor directories so pytest can walk into it.
|
||||||
|
if p == rp:
|
||||||
|
return False
|
||||||
|
if p.is_dir() and rp.is_relative_to(p):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Allow anything inside an allowed directory
|
||||||
|
if p.is_relative_to(rp):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ..test_cases.test_case import TestCase
|
from ..test_cases.test_case import TestCase
|
||||||
|
@ -35,6 +37,11 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
"remote::tgi",
|
"remote::tgi",
|
||||||
"remote::vertexai",
|
"remote::vertexai",
|
||||||
|
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
|
||||||
|
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
|
||||||
|
"remote::groq",
|
||||||
|
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||||
|
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||||
|
|
||||||
|
@ -56,6 +63,26 @@ def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
|
||||||
pytest.skip(f"Provider {provider.provider_type} doesn't support suffix.")
|
pytest.skip(f"Provider {provider.provider_type} doesn't support suffix.")
|
||||||
|
|
||||||
|
|
||||||
|
def skip_if_doesnt_support_n(client_with_models, model_id):
|
||||||
|
provider = provider_from_model(client_with_models, model_id)
|
||||||
|
if provider.provider_type in (
|
||||||
|
"remote::sambanova",
|
||||||
|
"remote::ollama",
|
||||||
|
# https://console.groq.com/docs/openai#currently-unsupported-openai-features
|
||||||
|
# -> Error code: 400 - {'error': {'message': "'n' : number must be at most 1", 'type': 'invalid_request_error'}}
|
||||||
|
"remote::groq",
|
||||||
|
# Error code: 400 - [{'error': {'code': 400, 'message': 'Only one candidate can be specified in the
|
||||||
|
# current model', 'status': 'INVALID_ARGUMENT'}}]
|
||||||
|
"remote::gemini",
|
||||||
|
# https://docs.anthropic.com/en/api/openai-sdk#simple-fields
|
||||||
|
"remote::anthropic",
|
||||||
|
"remote::vertexai",
|
||||||
|
# Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
|
||||||
|
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
|
||||||
|
):
|
||||||
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
||||||
|
|
||||||
|
|
||||||
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
|
def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, model_id):
|
||||||
provider = provider_from_model(client_with_models, model_id)
|
provider = provider_from_model(client_with_models, model_id)
|
||||||
if provider.provider_type in (
|
if provider.provider_type in (
|
||||||
|
@ -260,10 +287,7 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
|
||||||
)
|
)
|
||||||
def test_openai_chat_completion_streaming_with_n(compat_client, client_with_models, text_model_id, test_case):
|
def test_openai_chat_completion_streaming_with_n(compat_client, client_with_models, text_model_id, test_case):
|
||||||
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
|
||||||
|
skip_if_doesnt_support_n(client_with_models, text_model_id)
|
||||||
provider = provider_from_model(client_with_models, text_model_id)
|
|
||||||
if provider.provider_type == "remote::ollama":
|
|
||||||
pytest.skip(f"Model {text_model_id} hosted by {provider.provider_type} doesn't support n > 1.")
|
|
||||||
|
|
||||||
tc = TestCase(test_case)
|
tc = TestCase(test_case)
|
||||||
question = tc["question"]
|
question = tc["question"]
|
||||||
|
@ -323,8 +347,15 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
|
||||||
response_id = response.id
|
response_id = response.id
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
|
tries = 0
|
||||||
|
while tries < 10:
|
||||||
responses = client.chat.completions.list(limit=1000)
|
responses = client.chat.completions.list(limit=1000)
|
||||||
assert response_id in [r.id for r in responses.data]
|
if response_id in [r.id for r in responses.data]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tries += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
assert tries < 10, f"Response {response_id} not found after 1 second"
|
||||||
|
|
||||||
retrieved_response = client.chat.completions.retrieve(response_id)
|
retrieved_response = client.chat.completions.retrieve(response_id)
|
||||||
assert retrieved_response.id == response_id
|
assert retrieved_response.id == response_id
|
||||||
|
@ -388,6 +419,18 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
||||||
response_id = response.id
|
response_id = response.id
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
|
# wait for the response to be stored
|
||||||
|
tries = 0
|
||||||
|
while tries < 10:
|
||||||
|
responses = client.chat.completions.list(limit=1000)
|
||||||
|
if response_id in [r.id for r in responses.data]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
tries += 1
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
assert tries < 10, f"Response {response_id} not found after 1 second"
|
||||||
|
|
||||||
responses = client.chat.completions.list(limit=1000)
|
responses = client.chat.completions.list(limit=1000)
|
||||||
assert response_id in [r.id for r in responses.data]
|
assert response_id in [r.id for r in responses.data]
|
||||||
|
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:53.860911Z",
|
"created_at": "2025-09-03T17:37:35.23084Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 249137667,
|
"total_duration": 195981375,
|
||||||
"load_duration": 152509542,
|
"load_duration": 110522917,
|
||||||
"prompt_eval_count": 216,
|
"prompt_eval_count": 216,
|
||||||
"prompt_eval_duration": 71000000,
|
"prompt_eval_duration": 72393958,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 24000000,
|
"eval_duration": 11843000,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.033900164Z",
|
"created_at": "2025-09-03T17:41:43.950283Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.213371151Z",
|
"created_at": "2025-09-03T17:41:43.991122Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -57,7 +57,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.387513976Z",
|
"created_at": "2025-09-03T17:41:44.031378Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -75,7 +75,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.564344287Z",
|
"created_at": "2025-09-03T17:41:44.073098Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -93,7 +93,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.746579415Z",
|
"created_at": "2025-09-03T17:41:44.115961Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -111,7 +111,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:18.923276047Z",
|
"created_at": "2025-09-03T17:41:44.156517Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -129,7 +129,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.099961963Z",
|
"created_at": "2025-09-03T17:41:44.197079Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -147,7 +147,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.275621884Z",
|
"created_at": "2025-09-03T17:41:44.237565Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -165,7 +165,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.452204196Z",
|
"created_at": "2025-09-03T17:41:44.277755Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -183,7 +183,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.626937514Z",
|
"created_at": "2025-09-03T17:41:44.318476Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -201,7 +201,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.805566767Z",
|
"created_at": "2025-09-03T17:41:44.358628Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -219,7 +219,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:19.985987477Z",
|
"created_at": "2025-09-03T17:41:44.398984Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -237,7 +237,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.166458601Z",
|
"created_at": "2025-09-03T17:41:44.439232Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -255,7 +255,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.343346795Z",
|
"created_at": "2025-09-03T17:41:44.479478Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -273,7 +273,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.525008091Z",
|
"created_at": "2025-09-03T17:41:44.520202Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -291,7 +291,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.709087695Z",
|
"created_at": "2025-09-03T17:41:44.560517Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -309,7 +309,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:20.887074305Z",
|
"created_at": "2025-09-03T17:41:44.601592Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -327,15 +327,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-07-31T17:59:21.065244925Z",
|
"created_at": "2025-09-03T17:41:44.642064Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 4373531496,
|
"total_duration": 887142667,
|
||||||
"load_duration": 44438132,
|
"load_duration": 119331417,
|
||||||
"prompt_eval_count": 56,
|
"prompt_eval_count": 56,
|
||||||
"prompt_eval_duration": 1296273199,
|
"prompt_eval_duration": 74294709,
|
||||||
"eval_count": 18,
|
"eval_count": 18,
|
||||||
"eval_duration": 3032321735,
|
"eval_duration": 692842791,
|
||||||
"response": "",
|
"response": "",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:13:57.556416Z",
|
"created_at": "2025-09-03T17:37:47.461886Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 432363250,
|
"total_duration": 338927833,
|
||||||
"load_duration": 159296417,
|
"load_duration": 100895125,
|
||||||
"prompt_eval_count": 223,
|
"prompt_eval_count": 223,
|
||||||
"prompt_eval_duration": 257000000,
|
"prompt_eval_duration": 221583042,
|
||||||
"eval_count": 2,
|
"eval_count": 2,
|
||||||
"eval_duration": 14000000,
|
"eval_duration": 12341416,
|
||||||
"response": "safe",
|
"response": "safe",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -50,7 +50,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -65,7 +65,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -76,7 +76,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -91,7 +91,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -102,7 +102,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -117,7 +117,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921333,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -128,7 +128,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -143,7 +143,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -154,7 +154,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -169,7 +169,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -180,7 +180,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -195,7 +195,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
@ -206,7 +206,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-29",
|
"id": "chatcmpl-414",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
@ -221,7 +221,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754090031,
|
"created": 1756921334,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"request": {
|
"request": {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"body": {
|
"body": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
@ -20,14 +20,14 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-368",
|
"id": "chatcmpl-161",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"logprobs": null,
|
"logprobs": null,
|
||||||
"message": {
|
"message": {
|
||||||
"content": "Saturn is known for its extensive ring system.",
|
"content": "The answer is Saturn.",
|
||||||
"refusal": null,
|
"refusal": null,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"annotations": null,
|
"annotations": null,
|
||||||
|
@ -37,15 +37,15 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1754081853,
|
"created": 1756921364,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
"system_fingerprint": "fp_ollama",
|
"system_fingerprint": "fp_ollama",
|
||||||
"usage": {
|
"usage": {
|
||||||
"completion_tokens": 11,
|
"completion_tokens": 6,
|
||||||
"prompt_tokens": 39,
|
"prompt_tokens": 39,
|
||||||
"total_tokens": 50,
|
"total_tokens": 45,
|
||||||
"completion_tokens_details": null,
|
"completion_tokens_details": null,
|
||||||
"prompt_tokens_details": null
|
"prompt_tokens_details": null
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,15 +20,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"created_at": "2025-08-01T23:12:51.682357Z",
|
"created_at": "2025-09-03T17:37:33.473237Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 238161000,
|
"total_duration": 279025042,
|
||||||
"load_duration": 72494750,
|
"load_duration": 162673250,
|
||||||
"prompt_eval_count": 212,
|
"prompt_eval_count": 212,
|
||||||
"prompt_eval_duration": 87000000,
|
"prompt_eval_duration": 73595834,
|
||||||
"eval_count": 5,
|
"eval_count": 5,
|
||||||
"eval_duration": 74000000,
|
"eval_duration": 41950291,
|
||||||
"response": "unsafe\nS8",
|
"response": "unsafe\nS8",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue