mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Merge branch 'main' into crewai
This commit is contained in:
commit
4375764074
272 changed files with 28701 additions and 17713 deletions
60
.github/actions/run-and-record-tests/action.yml
vendored
60
.github/actions/run-and-record-tests/action.yml
vendored
|
@ -2,26 +2,28 @@ name: 'Run and Record Tests'
|
||||||
description: 'Run integration tests and handle recording/artifact upload'
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
required: true
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
required: false
|
|
||||||
default: ''
|
|
||||||
stack-config:
|
stack-config:
|
||||||
description: 'Stack configuration to use'
|
description: 'Stack configuration to use'
|
||||||
required: true
|
required: true
|
||||||
provider:
|
setup:
|
||||||
description: 'Provider to use for tests'
|
description: 'Setup to use for tests (e.g., ollama, gpt, vllm)'
|
||||||
required: true
|
required: false
|
||||||
|
default: ''
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
|
subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides suite'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: 'composite'
|
using: 'composite'
|
||||||
|
@ -36,14 +38,23 @@ runs:
|
||||||
- name: Run Integration Tests
|
- name: Run Integration Tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync ./scripts/integration-tests.sh \
|
SCRIPT_ARGS="--stack-config ${{ inputs.stack-config }} --inference-mode ${{ inputs.inference-mode }}"
|
||||||
--stack-config '${{ inputs.stack-config }}' \
|
|
||||||
--provider '${{ inputs.provider }}' \
|
# Add optional arguments only if they are provided
|
||||||
--test-subdirs '${{ inputs.test-subdirs }}' \
|
if [ -n '${{ inputs.setup }}' ]; then
|
||||||
--test-pattern '${{ inputs.test-pattern }}' \
|
SCRIPT_ARGS="$SCRIPT_ARGS --setup ${{ inputs.setup }}"
|
||||||
--inference-mode '${{ inputs.inference-mode }}' \
|
fi
|
||||||
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \
|
if [ -n '${{ inputs.suite }}' ]; then
|
||||||
| tee pytest-${{ inputs.inference-mode }}.log
|
SCRIPT_ARGS="$SCRIPT_ARGS --suite ${{ inputs.suite }}"
|
||||||
|
fi
|
||||||
|
if [ -n '${{ inputs.subdirs }}' ]; then
|
||||||
|
SCRIPT_ARGS="$SCRIPT_ARGS --subdirs ${{ inputs.subdirs }}"
|
||||||
|
fi
|
||||||
|
if [ -n '${{ inputs.pattern }}' ]; then
|
||||||
|
SCRIPT_ARGS="$SCRIPT_ARGS --pattern ${{ inputs.pattern }}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
uv run --no-sync ./scripts/integration-tests.sh $SCRIPT_ARGS | tee pytest-${{ inputs.inference-mode }}.log
|
||||||
|
|
||||||
|
|
||||||
- name: Commit and push recordings
|
- name: Commit and push recordings
|
||||||
|
@ -57,12 +68,7 @@ runs:
|
||||||
echo "New recordings detected, committing and pushing"
|
echo "New recordings detected, committing and pushing"
|
||||||
git add tests/integration/recordings/
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
|
||||||
git commit -m "Recordings update from CI (vision)"
|
|
||||||
else
|
|
||||||
git commit -m "Recordings update from CI"
|
|
||||||
fi
|
|
||||||
|
|
||||||
git fetch origin ${{ github.ref_name }}
|
git fetch origin ${{ github.ref_name }}
|
||||||
git rebase origin/${{ github.ref_name }}
|
git rebase origin/${{ github.ref_name }}
|
||||||
echo "Rebased successfully"
|
echo "Rebased successfully"
|
||||||
|
|
8
.github/actions/setup-ollama/action.yml
vendored
8
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,17 +1,17 @@
|
||||||
name: Setup Ollama
|
name: Setup Ollama
|
||||||
description: Start Ollama
|
description: Start Ollama
|
||||||
inputs:
|
inputs:
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Run vision tests: "true" or "false"'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: ''
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Start Ollama
|
- name: Start Ollama
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
if [ "${{ inputs.suite }}" == "vision" ]; then
|
||||||
image="ollama-with-vision-model"
|
image="ollama-with-vision-model"
|
||||||
else
|
else
|
||||||
image="ollama-with-models"
|
image="ollama-with-models"
|
||||||
|
|
|
@ -8,14 +8,14 @@ inputs:
|
||||||
client-version:
|
client-version:
|
||||||
description: 'Client version (latest or published)'
|
description: 'Client version (latest or published)'
|
||||||
required: true
|
required: true
|
||||||
provider:
|
setup:
|
||||||
description: 'Provider to setup (ollama or vllm)'
|
description: 'Setup to configure (ollama, vllm, gpt, etc.)'
|
||||||
required: true
|
|
||||||
default: 'ollama'
|
|
||||||
run-vision-tests:
|
|
||||||
description: 'Whether to setup provider for vision tests'
|
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: 'ollama'
|
||||||
|
suite:
|
||||||
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
inference-mode:
|
inference-mode:
|
||||||
description: 'Inference mode (record or replay)'
|
description: 'Inference mode (record or replay)'
|
||||||
required: true
|
required: true
|
||||||
|
@ -30,13 +30,13 @@ runs:
|
||||||
client-version: ${{ inputs.client-version }}
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
- name: Setup ollama
|
- name: Setup ollama
|
||||||
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-ollama
|
uses: ./.github/actions/setup-ollama
|
||||||
with:
|
with:
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
|
|
||||||
- name: Setup vllm
|
- name: Setup vllm
|
||||||
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-vllm
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
|
|
3
.github/workflows/README.md
vendored
3
.github/workflows/README.md
vendored
|
@ -5,10 +5,11 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
|
57
.github/workflows/conformance.yml
vendored
Normal file
57
.github/workflows/conformance.yml
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# API Conformance Tests
|
||||||
|
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
||||||
|
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
||||||
|
|
||||||
|
name: API Conformance Tests
|
||||||
|
|
||||||
|
run-name: Run the API Conformance test suite on the changes.
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/**'
|
||||||
|
- '!llama_stack/ui/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- '.github/workflows/conformance.yml' # This workflow itself
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
# Cancel in-progress runs when new commits are pushed to avoid wasting CI resources
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# Job to check if API schema changes maintain backward compatibility
|
||||||
|
check-schema-compatibility:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
# Using specific version 4.1.7 because 5.0.0 fails when trying to run this locally using `act`
|
||||||
|
# This ensures consistent behavior between local testing and CI
|
||||||
|
- name: Checkout PR Code
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
|
# Checkout the base branch to compare against (usually main)
|
||||||
|
# This allows us to diff the current changes against the previous state
|
||||||
|
- name: Checkout Base Branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.pull_request.base.ref }}
|
||||||
|
path: 'base'
|
||||||
|
|
||||||
|
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||||
|
- name: Install oasdiff
|
||||||
|
run: |
|
||||||
|
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||||
|
|
||||||
|
# Run oasdiff to detect breaking changes in the API specification
|
||||||
|
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||||
|
- name: Run OpenAPI Breaking Change Diff
|
||||||
|
run: |
|
||||||
|
oasdiff breaking --fail-on ERR base/docs/_static/llama-stack-spec.yaml docs/_static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
|
||||||
|
--match-path '^/v1/vector-io' \
|
||||||
|
--match-path '^/v1/vector-dbs'
|
32
.github/workflows/integration-tests.yml
vendored
32
.github/workflows/integration-tests.yml
vendored
|
@ -1,6 +1,6 @@
|
||||||
name: Integration Tests (Replay)
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration in replay mode
|
run-name: Run the integration test suites from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
@ -28,18 +28,10 @@ on:
|
||||||
description: 'Test against both the latest and published versions'
|
description: 'Test against both the latest and published versions'
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
test-provider:
|
test-setup:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific setup'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
test-subdirs:
|
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-pattern:
|
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
|
@ -50,18 +42,18 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
run-vision-tests: [true, false]
|
suite: [base, vision]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -72,16 +64,14 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
provider: ${{ matrix.provider }}
|
setup: ${{ matrix.setup }}
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
suite: ${{ matrix.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
setup: ${{ matrix.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
suite: ${{ matrix.suite }}
|
||||||
|
|
5
.github/workflows/pre-commit.yml
vendored
5
.github/workflows/pre-commit.yml
vendored
|
@ -28,7 +28,7 @@ jobs:
|
||||||
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.12'
|
python-version: '3.12'
|
||||||
cache: pip
|
cache: pip
|
||||||
|
@ -37,7 +37,7 @@ jobs:
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
|
@ -48,7 +48,6 @@ jobs:
|
||||||
working-directory: llama_stack/ui
|
working-directory: llama_stack/ui
|
||||||
|
|
||||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
continue-on-error: true
|
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
RUFF_OUTPUT_FORMAT: github
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
|
uses: astral-sh/setup-uv@557e51de59eb14aaaba2ed9621916900a91d50c6 # v6.6.1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
42
.github/workflows/record-integration-tests.yml
vendored
42
.github/workflows/record-integration-tests.yml
vendored
|
@ -10,19 +10,19 @@ run-name: Run the integration test suite from tests/integration
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-subdirs:
|
test-setup:
|
||||||
description: 'Comma-separated list of test subdirectories to run'
|
description: 'Test against a specific setup'
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
test-provider:
|
|
||||||
description: 'Test against a specific provider'
|
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
run-vision-tests:
|
suite:
|
||||||
description: 'Whether to run vision tests'
|
description: 'Test suite to use: base, responses, vision, etc.'
|
||||||
type: boolean
|
type: string
|
||||||
default: false
|
default: ''
|
||||||
test-pattern:
|
subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run; overrides suite'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
pattern:
|
||||||
description: 'Regex pattern to pass to pytest -k'
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
|
@ -38,11 +38,11 @@ jobs:
|
||||||
- name: Echo workflow inputs
|
- name: Echo workflow inputs
|
||||||
run: |
|
run: |
|
||||||
echo "::group::Workflow Inputs"
|
echo "::group::Workflow Inputs"
|
||||||
echo "test-subdirs: ${{ inputs.test-subdirs }}"
|
|
||||||
echo "test-provider: ${{ inputs.test-provider }}"
|
|
||||||
echo "run-vision-tests: ${{ inputs.run-vision-tests }}"
|
|
||||||
echo "test-pattern: ${{ inputs.test-pattern }}"
|
|
||||||
echo "branch: ${{ github.ref_name }}"
|
echo "branch: ${{ github.ref_name }}"
|
||||||
|
echo "test-setup: ${{ inputs.test-setup }}"
|
||||||
|
echo "suite: ${{ inputs.suite }}"
|
||||||
|
echo "subdirs: ${{ inputs.subdirs }}"
|
||||||
|
echo "pattern: ${{ inputs.pattern }}"
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -55,16 +55,16 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: "3.12" # Use single Python version for recording
|
python-version: "3.12" # Use single Python version for recording
|
||||||
client-version: "latest"
|
client-version: "latest"
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
setup: ${{ inputs.test-setup || 'ollama' }}
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run and record tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-pattern: ${{ inputs.test-pattern }}
|
|
||||||
test-subdirs: ${{ inputs.test-subdirs }}
|
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
setup: ${{ inputs.test-setup || 'ollama' }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
suite: ${{ inputs.suite }}
|
||||||
|
subdirs: ${{ inputs.subdirs }}
|
||||||
|
pattern: ${{ inputs.pattern }}
|
||||||
|
|
2
.github/workflows/stale_bot.yml
vendored
2
.github/workflows/stale_bot.yml
vendored
|
@ -24,7 +24,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Stale Action
|
- name: Stale Action
|
||||||
uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
||||||
with:
|
with:
|
||||||
stale-issue-label: 'stale'
|
stale-issue-label: 'stale'
|
||||||
stale-issue-message: >
|
stale-issue-message: >
|
||||||
|
|
2
.github/workflows/ui-unit-tests.yml
vendored
2
.github/workflows/ui-unit-tests.yml
vendored
|
@ -29,7 +29,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Setup Node.js
|
- name: Setup Node.js
|
||||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
with:
|
with:
|
||||||
node-version: ${{ matrix.node-version }}
|
node-version: ${{ matrix.node-version }}
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -26,5 +26,7 @@ venv/
|
||||||
pytest-report.xml
|
pytest-report.xml
|
||||||
.coverage
|
.coverage
|
||||||
.python-version
|
.python-version
|
||||||
|
AGENTS.md
|
||||||
|
server.log
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
.claude/
|
.claude/
|
||||||
|
|
|
@ -86,7 +86,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
- id: provider-codegen
|
- id: provider-codegen
|
||||||
name: Provider Codegen
|
name: Provider Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
98
CHANGELOG.md
98
CHANGELOG.md
|
@ -1,5 +1,103 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
# v0.2.20
|
||||||
|
Published on: 2025-08-29T22:25:32Z
|
||||||
|
|
||||||
|
Here are some key changes that are coming as part of this release.
|
||||||
|
|
||||||
|
### Build and Environment
|
||||||
|
|
||||||
|
- Environment improvements: fixed env var replacement to preserve types.
|
||||||
|
- Docker stability: fixed container startup failures for Fireworks AI provider.
|
||||||
|
- Removed absolute paths in build for better portability.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
- UI Enhancements: Implemented file upload and VectorDB creation/configuration directly in UI.
|
||||||
|
- Vector Store Improvements: Added keyword, vector, and hybrid search inside vector store.
|
||||||
|
- Added S3 authorization support for file providers.
|
||||||
|
- SQL Store: Added inequality support to where clause.
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- Fixed post-training docs.
|
||||||
|
- Added Contributor Guidelines for creating Internal vs. External providers.
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Removed unsupported bfcl scoring function.
|
||||||
|
- Multiple reliability and configuration fixes for providers and environment handling.
|
||||||
|
|
||||||
|
### Engineering / Chores
|
||||||
|
|
||||||
|
- Cleaner internal development setup with consistent paths.
|
||||||
|
- Incremental improvements to provider integration and vector store behavior.
|
||||||
|
|
||||||
|
|
||||||
|
### New Contributors
|
||||||
|
- @omertuc made their first contribution in #3270
|
||||||
|
- @r3v5 made their first contribution in vector store hybrid search
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.19
|
||||||
|
Published on: 2025-08-26T22:06:55Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* feat: Add CORS configuration support for server by @skamenan7 in https://github.com/llamastack/llama-stack/pull/3201
|
||||||
|
* feat(api): introduce /rerank by @ehhuang in https://github.com/llamastack/llama-stack/pull/2940
|
||||||
|
* feat: Add S3 Files Provider by @mattf in https://github.com/llamastack/llama-stack/pull/3202
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.18
|
||||||
|
Published on: 2025-08-20T01:09:27Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* Add moderations create API
|
||||||
|
* Hybrid search in Milvus
|
||||||
|
* Numerous Responses API improvements
|
||||||
|
* Documentation updates
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.17
|
||||||
|
Published on: 2025-08-05T01:51:14Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* feat(tests): introduce inference record/replay to increase test reliability by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2941
|
||||||
|
* fix(library_client): improve initialization error handling and prevent AttributeError by @mattf in https://github.com/meta-llama/llama-stack/pull/2944
|
||||||
|
* fix: use OLLAMA_URL to activate Ollama provider in starter by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2963
|
||||||
|
* feat(UI): adding MVP playground UI by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2828
|
||||||
|
* Standardization of errors (@nathan-weinberg)
|
||||||
|
* feat: Enable DPO training with HuggingFace inline provider by @Nehanth in https://github.com/meta-llama/llama-stack/pull/2825
|
||||||
|
* chore: rename templates to distributions by @ashwinb in https://github.com/meta-llama/llama-stack/pull/3035
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.16
|
||||||
|
Published on: 2025-07-28T23:35:23Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
|
||||||
|
* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details.
|
||||||
|
* All tests migrated to pytest now (thanks @Elbehery)
|
||||||
|
* DPO implementation in the post-training provider (thanks @Nehanth)
|
||||||
|
* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
|
||||||
|
* `inline::vllm` provider is gone thank you very much
|
||||||
|
* several improvements to OpenAI inference implementations and LiteLLM backend (thanks @mattf)
|
||||||
|
* Chroma now supports Vector Store API (thanks @franciscojavierarceo).
|
||||||
|
* Authorization improvements: Vector Store/File APIs now supports access control (thanks @franciscojavierarceo); Telemetry read APIs are gated according to logged-in user's roles.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
# v0.2.15
|
# v0.2.15
|
||||||
Published on: 2025-07-16T03:30:01Z
|
Published on: 2025-07-16T03:30:01Z
|
||||||
|
|
||||||
|
|
|
@ -34,13 +34,12 @@ This data enables data-driven architectural decisions and performance optimizati
|
||||||
|
|
||||||
**1. Deploy base k8s infrastructure:**
|
**1. Deploy base k8s infrastructure:**
|
||||||
```bash
|
```bash
|
||||||
cd ../k8s
|
cd ../../docs/source/distributions/k8s
|
||||||
./apply.sh
|
./apply.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
**2. Deploy benchmark components:**
|
**2. Deploy benchmark components:**
|
||||||
```bash
|
```bash
|
||||||
cd ../k8s-benchmark
|
|
||||||
./apply.sh
|
./apply.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -56,7 +55,6 @@ kubectl get pods
|
||||||
|
|
||||||
**Benchmark Llama Stack (default):**
|
**Benchmark Llama Stack (default):**
|
||||||
```bash
|
```bash
|
||||||
cd docs/source/distributions/k8s-benchmark/
|
|
||||||
./run-benchmark.sh
|
./run-benchmark.sh
|
||||||
```
|
```
|
||||||
|
|
|
@ -14,7 +14,7 @@ import os
|
||||||
import random
|
import random
|
||||||
import statistics
|
import statistics
|
||||||
import time
|
import time
|
||||||
from typing import Tuple
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,17 +57,9 @@ class BenchmarkStats:
|
||||||
success_rate = (self.success_count / self.total_requests) * 100
|
success_rate = (self.success_count / self.total_requests) * 100
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f"BENCHMARK RESULTS")
|
print("BENCHMARK RESULTS")
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Total time: {total_time:.2f}s")
|
|
||||||
print(f"Concurrent users: {self.concurrent_users}")
|
|
||||||
print(f"Total requests: {self.total_requests}")
|
|
||||||
print(f"Successful requests: {self.success_count}")
|
|
||||||
print(f"Failed requests: {len(self.errors)}")
|
|
||||||
print(f"Success rate: {success_rate:.1f}%")
|
|
||||||
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
|
||||||
|
|
||||||
print(f"\nResponse Time Statistics:")
|
print("\nResponse Time Statistics:")
|
||||||
print(f" Mean: {statistics.mean(self.response_times):.3f}s")
|
print(f" Mean: {statistics.mean(self.response_times):.3f}s")
|
||||||
print(f" Median: {statistics.median(self.response_times):.3f}s")
|
print(f" Median: {statistics.median(self.response_times):.3f}s")
|
||||||
print(f" Min: {min(self.response_times):.3f}s")
|
print(f" Min: {min(self.response_times):.3f}s")
|
||||||
|
@ -78,14 +70,14 @@ class BenchmarkStats:
|
||||||
|
|
||||||
percentiles = [50, 90, 95, 99]
|
percentiles = [50, 90, 95, 99]
|
||||||
sorted_times = sorted(self.response_times)
|
sorted_times = sorted(self.response_times)
|
||||||
print(f"\nPercentiles:")
|
print("\nPercentiles:")
|
||||||
for p in percentiles:
|
for p in percentiles:
|
||||||
idx = int(len(sorted_times) * p / 100) - 1
|
idx = int(len(sorted_times) * p / 100) - 1
|
||||||
idx = max(0, min(idx, len(sorted_times) - 1))
|
idx = max(0, min(idx, len(sorted_times) - 1))
|
||||||
print(f" P{p}: {sorted_times[idx]:.3f}s")
|
print(f" P{p}: {sorted_times[idx]:.3f}s")
|
||||||
|
|
||||||
if self.ttft_times:
|
if self.ttft_times:
|
||||||
print(f"\nTime to First Token (TTFT) Statistics:")
|
print("\nTime to First Token (TTFT) Statistics:")
|
||||||
print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
|
print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
|
||||||
print(f" Median: {statistics.median(self.ttft_times):.3f}s")
|
print(f" Median: {statistics.median(self.ttft_times):.3f}s")
|
||||||
print(f" Min: {min(self.ttft_times):.3f}s")
|
print(f" Min: {min(self.ttft_times):.3f}s")
|
||||||
|
@ -95,26 +87,35 @@ class BenchmarkStats:
|
||||||
print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
|
print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
|
||||||
|
|
||||||
sorted_ttft = sorted(self.ttft_times)
|
sorted_ttft = sorted(self.ttft_times)
|
||||||
print(f"\nTTFT Percentiles:")
|
print("\nTTFT Percentiles:")
|
||||||
for p in percentiles:
|
for p in percentiles:
|
||||||
idx = int(len(sorted_ttft) * p / 100) - 1
|
idx = int(len(sorted_ttft) * p / 100) - 1
|
||||||
idx = max(0, min(idx, len(sorted_ttft) - 1))
|
idx = max(0, min(idx, len(sorted_ttft) - 1))
|
||||||
print(f" P{p}: {sorted_ttft[idx]:.3f}s")
|
print(f" P{p}: {sorted_ttft[idx]:.3f}s")
|
||||||
|
|
||||||
if self.chunks_received:
|
if self.chunks_received:
|
||||||
print(f"\nStreaming Statistics:")
|
print("\nStreaming Statistics:")
|
||||||
print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
|
print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
|
||||||
print(f" Total chunks received: {sum(self.chunks_received)}")
|
print(f" Total chunks received: {sum(self.chunks_received)}")
|
||||||
|
|
||||||
|
print(f"{'=' * 60}")
|
||||||
|
print(f"Total time: {total_time:.2f}s")
|
||||||
|
print(f"Concurrent users: {self.concurrent_users}")
|
||||||
|
print(f"Total requests: {self.total_requests}")
|
||||||
|
print(f"Successful requests: {self.success_count}")
|
||||||
|
print(f"Failed requests: {len(self.errors)}")
|
||||||
|
print(f"Success rate: {success_rate:.1f}%")
|
||||||
|
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
||||||
|
|
||||||
if self.errors:
|
if self.errors:
|
||||||
print(f"\nErrors (showing first 5):")
|
print("\nErrors (showing first 5):")
|
||||||
for error in self.errors[:5]:
|
for error in self.errors[:5]:
|
||||||
print(f" {error}")
|
print(f" {error}")
|
||||||
|
|
||||||
|
|
||||||
class LlamaStackBenchmark:
|
class LlamaStackBenchmark:
|
||||||
def __init__(self, base_url: str, model_id: str):
|
def __init__(self, base_url: str, model_id: str):
|
||||||
self.base_url = base_url.rstrip('/')
|
self.base_url = base_url.rstrip("/")
|
||||||
self.model_id = model_id
|
self.model_id = model_id
|
||||||
self.headers = {"Content-Type": "application/json"}
|
self.headers = {"Content-Type": "application/json"}
|
||||||
self.test_messages = [
|
self.test_messages = [
|
||||||
|
@ -125,20 +126,14 @@ class LlamaStackBenchmark:
|
||||||
[
|
[
|
||||||
{"role": "user", "content": "What is machine learning?"},
|
{"role": "user", "content": "What is machine learning?"},
|
||||||
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
|
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
|
||||||
{"role": "user", "content": "Can you give me a practical example?"}
|
{"role": "user", "content": "Can you give me a practical example?"},
|
||||||
]
|
],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def make_async_streaming_request(self) -> tuple[float, int, float | None, str | None]:
|
||||||
async def make_async_streaming_request(self) -> Tuple[float, int, float | None, str | None]:
|
|
||||||
"""Make a single async streaming chat completion request."""
|
"""Make a single async streaming chat completion request."""
|
||||||
messages = random.choice(self.test_messages)
|
messages = random.choice(self.test_messages)
|
||||||
payload = {
|
payload = {"model": self.model_id, "messages": messages, "stream": True, "max_tokens": 100}
|
||||||
"model": self.model_id,
|
|
||||||
"messages": messages,
|
|
||||||
"stream": True,
|
|
||||||
"max_tokens": 100
|
|
||||||
}
|
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
chunks_received = 0
|
chunks_received = 0
|
||||||
|
@ -152,17 +147,17 @@ class LlamaStackBenchmark:
|
||||||
f"{self.base_url}/chat/completions",
|
f"{self.base_url}/chat/completions",
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=aiohttp.ClientTimeout(total=30)
|
timeout=aiohttp.ClientTimeout(total=30),
|
||||||
) as response:
|
) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
async for line in response.content:
|
async for line in response.content:
|
||||||
if line:
|
if line:
|
||||||
line_str = line.decode('utf-8').strip()
|
line_str = line.decode("utf-8").strip()
|
||||||
if line_str.startswith('data: '):
|
if line_str.startswith("data: "):
|
||||||
chunks_received += 1
|
chunks_received += 1
|
||||||
if ttft is None:
|
if ttft is None:
|
||||||
ttft = time.time() - start_time
|
ttft = time.time() - start_time
|
||||||
if line_str == 'data: [DONE]':
|
if line_str == "data: [DONE]":
|
||||||
break
|
break
|
||||||
|
|
||||||
if chunks_received == 0:
|
if chunks_received == 0:
|
||||||
|
@ -179,7 +174,6 @@ class LlamaStackBenchmark:
|
||||||
response_time = time.time() - start_time
|
response_time = time.time() - start_time
|
||||||
return response_time, chunks_received, ttft, error
|
return response_time, chunks_received, ttft, error
|
||||||
|
|
||||||
|
|
||||||
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
|
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
|
||||||
"""Run benchmark using async requests for specified duration."""
|
"""Run benchmark using async requests for specified duration."""
|
||||||
stats = BenchmarkStats()
|
stats = BenchmarkStats()
|
||||||
|
@ -191,7 +185,7 @@ class LlamaStackBenchmark:
|
||||||
print(f"Model: {self.model_id}")
|
print(f"Model: {self.model_id}")
|
||||||
|
|
||||||
connector = aiohttp.TCPConnector(limit=concurrent_users)
|
connector = aiohttp.TCPConnector(limit=concurrent_users)
|
||||||
async with aiohttp.ClientSession(connector=connector) as session:
|
async with aiohttp.ClientSession(connector=connector):
|
||||||
|
|
||||||
async def worker(worker_id: int):
|
async def worker(worker_id: int):
|
||||||
"""Worker that sends requests sequentially until canceled."""
|
"""Worker that sends requests sequentially until canceled."""
|
||||||
|
@ -215,7 +209,9 @@ class LlamaStackBenchmark:
|
||||||
await asyncio.sleep(1) # Report every second
|
await asyncio.sleep(1) # Report every second
|
||||||
if time.time() >= last_report_time + 10: # Report every 10 seconds
|
if time.time() >= last_report_time + 10: # Report every 10 seconds
|
||||||
elapsed = time.time() - stats.start_time
|
elapsed = time.time() - stats.start_time
|
||||||
print(f"Completed: {stats.total_requests} requests in {elapsed:.1f}s")
|
print(
|
||||||
|
f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}"
|
||||||
|
)
|
||||||
last_report_time = time.time()
|
last_report_time = time.time()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
break
|
break
|
||||||
|
@ -240,14 +236,16 @@ class LlamaStackBenchmark:
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
|
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
|
||||||
parser.add_argument("--base-url", default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
parser.add_argument(
|
||||||
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)")
|
"--base-url",
|
||||||
parser.add_argument("--model", default=os.getenv("INFERENCE_MODEL", "test-model"),
|
default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
||||||
help="Model ID to use for requests")
|
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)",
|
||||||
parser.add_argument("--duration", type=int, default=60,
|
)
|
||||||
help="Duration in seconds to run benchmark (default: 60)")
|
parser.add_argument(
|
||||||
parser.add_argument("--concurrent", type=int, default=10,
|
"--model", default=os.getenv("INFERENCE_MODEL", "test-model"), help="Model ID to use for requests"
|
||||||
help="Number of concurrent users (default: 10)")
|
)
|
||||||
|
parser.add_argument("--duration", type=int, default=60, help="Duration in seconds to run benchmark (default: 60)")
|
||||||
|
parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent users (default: 10)")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -11,16 +11,18 @@ OpenAI-compatible mock server that returns:
|
||||||
- Valid OpenAI-formatted chat completion responses with dynamic content
|
- Valid OpenAI-formatted chat completion responses with dynamic content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from flask import Flask, request, jsonify, Response
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
import uuid
|
|
||||||
import json
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from flask import Flask, Response, jsonify, request
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Models from environment variables
|
# Models from environment variables
|
||||||
def get_models():
|
def get_models():
|
||||||
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
|
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
|
||||||
|
@ -29,40 +31,72 @@ def get_models():
|
||||||
return {
|
return {
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": [
|
"data": [
|
||||||
{
|
{"id": model_id, "object": "model", "created": 1234567890, "owned_by": "vllm"} for model_id in model_ids
|
||||||
"id": model_id,
|
],
|
||||||
"object": "model",
|
|
||||||
"created": 1234567890,
|
|
||||||
"owned_by": "vllm"
|
|
||||||
}
|
|
||||||
for model_id in model_ids
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generate_random_text(length=50):
|
def generate_random_text(length=50):
|
||||||
"""Generate random but coherent text for responses."""
|
"""Generate random but coherent text for responses."""
|
||||||
words = [
|
words = [
|
||||||
"Hello", "there", "I'm", "an", "AI", "assistant", "ready", "to", "help", "you",
|
"Hello",
|
||||||
"with", "your", "questions", "and", "tasks", "today", "Let", "me","know", "what",
|
"there",
|
||||||
"you'd", "like", "to", "discuss", "or", "explore", "together", "I", "can", "assist",
|
"I'm",
|
||||||
"with", "various", "topics", "including", "coding", "writing", "analysis", "and", "more"
|
"an",
|
||||||
|
"AI",
|
||||||
|
"assistant",
|
||||||
|
"ready",
|
||||||
|
"to",
|
||||||
|
"help",
|
||||||
|
"you",
|
||||||
|
"with",
|
||||||
|
"your",
|
||||||
|
"questions",
|
||||||
|
"and",
|
||||||
|
"tasks",
|
||||||
|
"today",
|
||||||
|
"Let",
|
||||||
|
"me",
|
||||||
|
"know",
|
||||||
|
"what",
|
||||||
|
"you'd",
|
||||||
|
"like",
|
||||||
|
"to",
|
||||||
|
"discuss",
|
||||||
|
"or",
|
||||||
|
"explore",
|
||||||
|
"together",
|
||||||
|
"I",
|
||||||
|
"can",
|
||||||
|
"assist",
|
||||||
|
"with",
|
||||||
|
"various",
|
||||||
|
"topics",
|
||||||
|
"including",
|
||||||
|
"coding",
|
||||||
|
"writing",
|
||||||
|
"analysis",
|
||||||
|
"and",
|
||||||
|
"more",
|
||||||
]
|
]
|
||||||
return " ".join(random.choices(words, k=length))
|
return " ".join(random.choices(words, k=length))
|
||||||
|
|
||||||
@app.route('/v1/models', methods=['GET'])
|
|
||||||
|
@app.route("/v1/models", methods=["GET"])
|
||||||
def list_models():
|
def list_models():
|
||||||
models = get_models()
|
models = get_models()
|
||||||
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
|
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
|
||||||
return jsonify(models)
|
return jsonify(models)
|
||||||
|
|
||||||
@app.route('/v1/chat/completions', methods=['POST'])
|
|
||||||
|
@app.route("/v1/chat/completions", methods=["POST"])
|
||||||
def chat_completions():
|
def chat_completions():
|
||||||
"""Return OpenAI-formatted chat completion responses."""
|
"""Return OpenAI-formatted chat completion responses."""
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
default_model = get_models()['data'][0]['id']
|
default_model = get_models()["data"][0]["id"]
|
||||||
model = data.get('model', default_model)
|
model = data.get("model", default_model)
|
||||||
messages = data.get('messages', [])
|
messages = data.get("messages", [])
|
||||||
stream = data.get('stream', False)
|
stream = data.get("stream", False)
|
||||||
|
|
||||||
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
|
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
|
||||||
|
|
||||||
|
@ -71,11 +105,12 @@ def chat_completions():
|
||||||
else:
|
else:
|
||||||
return handle_non_streaming_completion(model, messages)
|
return handle_non_streaming_completion(model, messages)
|
||||||
|
|
||||||
|
|
||||||
def handle_non_streaming_completion(model, messages):
|
def handle_non_streaming_completion(model, messages):
|
||||||
response_text = generate_random_text(random.randint(20, 80))
|
response_text = generate_random_text(random.randint(20, 80))
|
||||||
|
|
||||||
# Calculate realistic token counts
|
# Calculate realistic token counts
|
||||||
prompt_tokens = sum(len(str(msg.get('content', '')).split()) for msg in messages)
|
prompt_tokens = sum(len(str(msg.get("content", "")).split()) for msg in messages)
|
||||||
completion_tokens = len(response_text.split())
|
completion_tokens = len(response_text.split())
|
||||||
|
|
||||||
response = {
|
response = {
|
||||||
|
@ -83,25 +118,17 @@ def handle_non_streaming_completion(model, messages):
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": response_text
|
|
||||||
},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"usage": {
|
"usage": {
|
||||||
"prompt_tokens": prompt_tokens,
|
"prompt_tokens": prompt_tokens,
|
||||||
"completion_tokens": completion_tokens,
|
"completion_tokens": completion_tokens,
|
||||||
"total_tokens": prompt_tokens + completion_tokens
|
"total_tokens": prompt_tokens + completion_tokens,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
|
|
||||||
def handle_streaming_completion(model, messages):
|
def handle_streaming_completion(model, messages):
|
||||||
def generate_stream():
|
def generate_stream():
|
||||||
# Generate response text
|
# Generate response text
|
||||||
|
@ -114,12 +141,7 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"role": "assistant", "content": ""}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
||||||
|
|
||||||
|
@ -130,12 +152,7 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"content": f"{word} " if i < len(words) - 1 else word}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": f"{word} " if i < len(words) - 1 else word}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
# Configurable delay to simulate realistic streaming
|
# Configurable delay to simulate realistic streaming
|
||||||
|
@ -148,35 +165,30 @@ def handle_streaming_completion(model, messages):
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"created": int(time.time()),
|
"created": int(time.time()),
|
||||||
"model": model,
|
"model": model,
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"content": ""}, "finish_reason": "stop"}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": ""},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(final_chunk)}\n\n"
|
yield f"data: {json.dumps(final_chunk)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
generate_stream(),
|
generate_stream(),
|
||||||
mimetype='text/event-stream',
|
mimetype="text/event-stream",
|
||||||
headers={
|
headers={
|
||||||
'Cache-Control': 'no-cache',
|
"Cache-Control": "no-cache",
|
||||||
'Connection': 'keep-alive',
|
"Connection": "keep-alive",
|
||||||
'Access-Control-Allow-Origin': '*',
|
"Access-Control-Allow-Origin": "*",
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@app.route('/health', methods=['GET'])
|
|
||||||
|
@app.route("/health", methods=["GET"])
|
||||||
def health():
|
def health():
|
||||||
return jsonify({"status": "healthy", "type": "openai-mock"})
|
return jsonify({"status": "healthy", "type": "openai-mock"})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description='OpenAI-compatible mock server')
|
if __name__ == "__main__":
|
||||||
parser.add_argument('--port', type=int, default=8081,
|
parser = argparse.ArgumentParser(description="OpenAI-compatible mock server")
|
||||||
help='Port to run the server on (default: 8081)')
|
parser.add_argument("--port", type=int, default=8081, help="Port to run the server on (default: 8081)")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
port = args.port
|
port = args.port
|
||||||
|
@ -187,4 +199,4 @@ if __name__ == '__main__':
|
||||||
print("- OpenAI-formatted chat/completion responses with dynamic content")
|
print("- OpenAI-formatted chat/completion responses with dynamic content")
|
||||||
print("- Streaming support with valid SSE format")
|
print("- Streaming support with valid SSE format")
|
||||||
print(f"- Listening on: http://0.0.0.0:{port}")
|
print(f"- Listening on: http://0.0.0.0:{port}")
|
||||||
app.run(host='0.0.0.0', port=port, debug=False)
|
app.run(host="0.0.0.0", port=port, debug=False)
|
|
@ -6,6 +6,7 @@ data:
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
@ -19,13 +20,6 @@ data:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
- provider_id: vllm-safety
|
|
||||||
provider_type: remote::vllm
|
|
||||||
config:
|
|
||||||
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
|
@ -41,6 +35,14 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -111,9 +113,6 @@ data:
|
||||||
- model_id: ${env.INFERENCE_MODEL}
|
- model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- model_id: ${env.SAFETY_MODEL}
|
|
||||||
provider_id: vllm-safety
|
|
||||||
model_type: llm
|
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
|
@ -2,7 +2,10 @@ version: '2'
|
||||||
image_name: kubernetes-benchmark-demo
|
image_name: kubernetes-benchmark-demo
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- files
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
|
@ -18,6 +21,14 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
|
@ -30,6 +41,19 @@ providers:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config:
|
||||||
|
excluded_categories: []
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
@ -95,6 +119,8 @@ models:
|
||||||
- model_id: ${env.INFERENCE_MODEL}
|
- model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: vllm-inference
|
provider_id: vllm-inference
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
shields:
|
||||||
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
101
docs/_static/css/my_theme.css
vendored
101
docs/_static/css/my_theme.css
vendored
|
@ -1,5 +1,106 @@
|
||||||
@import url("theme.css");
|
@import url("theme.css");
|
||||||
|
|
||||||
|
/* Horizontal Navigation Bar */
|
||||||
|
.horizontal-nav {
|
||||||
|
background-color: #ffffff;
|
||||||
|
border-bottom: 1px solid #e5e5e5;
|
||||||
|
padding: 0;
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
z-index: 1050;
|
||||||
|
height: 50px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav {
|
||||||
|
background-color: #1a1a1a;
|
||||||
|
border-bottom: 1px solid #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-container {
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 0 auto;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 0 20px;
|
||||||
|
height: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-brand {
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #333;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-brand {
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 30px;
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a {
|
||||||
|
color: #666;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 500;
|
||||||
|
padding: 8px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a:hover,
|
||||||
|
.horizontal-nav .nav-links a.active {
|
||||||
|
color: #333;
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links a.active {
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a {
|
||||||
|
color: #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a:hover,
|
||||||
|
[data-theme="dark"] .horizontal-nav .nav-links a.active {
|
||||||
|
color: #fff;
|
||||||
|
background-color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links .github-link {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.horizontal-nav .nav-links .github-icon {
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
fill: currentColor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust main content to account for fixed nav */
|
||||||
|
.wy-nav-side {
|
||||||
|
top: 50px;
|
||||||
|
height: calc(100vh - 50px);
|
||||||
|
}
|
||||||
|
|
||||||
|
.wy-nav-content-wrap {
|
||||||
|
margin-top: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
.wy-nav-content {
|
.wy-nav-content {
|
||||||
max-width: 90%;
|
max-width: 90%;
|
||||||
}
|
}
|
||||||
|
|
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Horizontal Navigation Bar for Llama Stack Documentation
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
// Create the horizontal navigation HTML
|
||||||
|
const navHTML = `
|
||||||
|
<nav class="horizontal-nav">
|
||||||
|
<div class="nav-container">
|
||||||
|
<a href="/" class="nav-brand">Llama Stack</a>
|
||||||
|
<ul class="nav-links">
|
||||||
|
<li><a href="/">Docs</a></li>
|
||||||
|
<li><a href="/references/api_reference/">API Reference</a></li>
|
||||||
|
<li><a href="https://github.com/meta-llama/llama-stack" target="_blank" class="github-link">
|
||||||
|
<svg class="github-icon" viewBox="0 0 16 16" aria-hidden="true">
|
||||||
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/>
|
||||||
|
</svg>
|
||||||
|
GitHub
|
||||||
|
</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Insert the navigation at the beginning of the body
|
||||||
|
document.body.insertAdjacentHTML('afterbegin', navHTML);
|
||||||
|
|
||||||
|
// Update navigation links based on current page
|
||||||
|
updateActiveNav();
|
||||||
|
});
|
||||||
|
|
||||||
|
function updateActiveNav() {
|
||||||
|
const currentPath = window.location.pathname;
|
||||||
|
const navLinks = document.querySelectorAll('.horizontal-nav .nav-links a');
|
||||||
|
|
||||||
|
navLinks.forEach(link => {
|
||||||
|
// Remove any existing active classes
|
||||||
|
link.classList.remove('active');
|
||||||
|
|
||||||
|
// Add active class based on current path
|
||||||
|
if (currentPath === '/' && link.getAttribute('href') === '/') {
|
||||||
|
link.classList.add('active');
|
||||||
|
} else if (currentPath.includes('/references/api_reference/') && link.getAttribute('href').includes('api_reference')) {
|
||||||
|
link.classList.add('active');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
457
docs/_static/llama-stack-spec.html
vendored
457
docs/_static/llama-stack-spec.html
vendored
|
@ -633,6 +633,80 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A ListPromptsResponse containing all prompts.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListPromptsResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "List all prompts.",
|
||||||
|
"parameters": []
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The created Prompt resource.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Create a new prompt.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CreatePromptRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/agents/{agent_id}": {
|
"/v1/agents/{agent_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -901,6 +975,143 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A Prompt resource.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Get a prompt by its identifier and optional version.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to get.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "version",
|
||||||
|
"in": "query",
|
||||||
|
"description": "The version of the prompt to get (defaults to latest).",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The updated Prompt resource with incremented version.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Update an existing prompt (increments version).",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to update.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/UpdatePromptRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Delete a prompt.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to delete.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/inference/embeddings": {
|
"/v1/inference/embeddings": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -2836,6 +3047,49 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}/versions": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A ListPromptsResponse containing all versions of the prompt.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListPromptsResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "List all versions of a specific prompt.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt to list versions for.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/providers": {
|
"/v1/providers": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -5007,6 +5261,59 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/prompts/{prompt_id}/set-default-version": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The prompt with the specified version now set as default.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Prompts"
|
||||||
|
],
|
||||||
|
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "prompt_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The identifier of the prompt.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/SetDefaultVersionRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/post-training/supervised-fine-tune": {
|
"/v1/post-training/supervised-fine-tune": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -9670,6 +9977,65 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
||||||
},
|
},
|
||||||
|
"CreatePromptRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The prompt text content with variable placeholders."
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "List of variable names that can be used in the prompt template."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"prompt"
|
||||||
|
],
|
||||||
|
"title": "CreatePromptRequest"
|
||||||
|
},
|
||||||
|
"Prompt": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Version (integer starting at 1, incremented on save)"
|
||||||
|
},
|
||||||
|
"prompt_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'"
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "List of prompt variable names that can be used in the prompt template"
|
||||||
|
},
|
||||||
|
"is_default": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "Boolean indicating whether this version is the default version for this prompt"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"version",
|
||||||
|
"prompt_id",
|
||||||
|
"variables",
|
||||||
|
"is_default"
|
||||||
|
],
|
||||||
|
"title": "Prompt",
|
||||||
|
"description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."
|
||||||
|
},
|
||||||
"OpenAIDeleteResponseObject": {
|
"OpenAIDeleteResponseObject": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -10296,7 +10662,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "benchmark",
|
"const": "benchmark",
|
||||||
"default": "benchmark",
|
"default": "benchmark",
|
||||||
|
@ -10923,7 +11290,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "dataset",
|
"const": "dataset",
|
||||||
"default": "dataset",
|
"default": "dataset",
|
||||||
|
@ -11073,7 +11441,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "model",
|
"const": "model",
|
||||||
"default": "model",
|
"default": "model",
|
||||||
|
@ -11338,7 +11707,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "scoring_function",
|
"const": "scoring_function",
|
||||||
"default": "scoring_function",
|
"default": "scoring_function",
|
||||||
|
@ -11446,7 +11816,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "shield",
|
"const": "shield",
|
||||||
"default": "shield",
|
"default": "shield",
|
||||||
|
@ -11691,7 +12062,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "tool",
|
"const": "tool",
|
||||||
"default": "tool",
|
"default": "tool",
|
||||||
|
@ -11773,7 +12145,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "tool_group",
|
"const": "tool_group",
|
||||||
"default": "tool_group",
|
"default": "tool_group",
|
||||||
|
@ -12067,7 +12440,8 @@
|
||||||
"scoring_function",
|
"scoring_function",
|
||||||
"benchmark",
|
"benchmark",
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group",
|
||||||
|
"prompt"
|
||||||
],
|
],
|
||||||
"const": "vector_db",
|
"const": "vector_db",
|
||||||
"default": "vector_db",
|
"default": "vector_db",
|
||||||
|
@ -12882,6 +13256,23 @@
|
||||||
"title": "OpenAIResponseObjectWithInput",
|
"title": "OpenAIResponseObjectWithInput",
|
||||||
"description": "OpenAI response object extended with input context information."
|
"description": "OpenAI response object extended with input context information."
|
||||||
},
|
},
|
||||||
|
"ListPromptsResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"data"
|
||||||
|
],
|
||||||
|
"title": "ListPromptsResponse",
|
||||||
|
"description": "Response model to list prompts."
|
||||||
|
},
|
||||||
"ListProvidersResponse": {
|
"ListProvidersResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -17128,6 +17519,20 @@
|
||||||
"title": "ScoreBatchResponse",
|
"title": "ScoreBatchResponse",
|
||||||
"description": "Response from batch scoring operations on datasets."
|
"description": "Response from batch scoring operations on datasets."
|
||||||
},
|
},
|
||||||
|
"SetDefaultVersionRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "The version to set as default."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"version"
|
||||||
|
],
|
||||||
|
"title": "SetDefaultVersionRequest"
|
||||||
|
},
|
||||||
"AlgorithmConfig": {
|
"AlgorithmConfig": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
@ -17412,6 +17817,37 @@
|
||||||
"title": "SyntheticDataGenerationResponse",
|
"title": "SyntheticDataGenerationResponse",
|
||||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||||
},
|
},
|
||||||
|
"UpdatePromptRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The updated prompt text content."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "The current version of the prompt being updated."
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Updated list of variable names that can be used in the prompt template."
|
||||||
|
},
|
||||||
|
"set_as_default": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Set the new version as the default (default=True)."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"prompt",
|
||||||
|
"version",
|
||||||
|
"set_as_default"
|
||||||
|
],
|
||||||
|
"title": "UpdatePromptRequest"
|
||||||
|
},
|
||||||
"VersionInfo": {
|
"VersionInfo": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -17537,6 +17973,10 @@
|
||||||
{
|
{
|
||||||
"name": "PostTraining (Coming Soon)"
|
"name": "PostTraining (Coming Soon)"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Prompts",
|
||||||
|
"x-displayName": "Protocol for prompt management operations."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||||
|
@ -17587,6 +18027,7 @@
|
||||||
"Inspect",
|
"Inspect",
|
||||||
"Models",
|
"Models",
|
||||||
"PostTraining (Coming Soon)",
|
"PostTraining (Coming Soon)",
|
||||||
|
"Prompts",
|
||||||
"Providers",
|
"Providers",
|
||||||
"Safety",
|
"Safety",
|
||||||
"Scoring",
|
"Scoring",
|
||||||
|
|
332
docs/_static/llama-stack-spec.yaml
vendored
332
docs/_static/llama-stack-spec.yaml
vendored
|
@ -427,6 +427,58 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/prompts:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
A ListPromptsResponse containing all prompts.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListPromptsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: List all prompts.
|
||||||
|
parameters: []
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created Prompt resource.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: Create a new prompt.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreatePromptRequest'
|
||||||
|
required: true
|
||||||
/v1/agents/{agent_id}:
|
/v1/agents/{agent_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -616,6 +668,103 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
/v1/prompts/{prompt_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A Prompt resource.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Get a prompt by its identifier and optional version.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to get.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: version
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The version of the prompt to get (defaults to latest).
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
The updated Prompt resource with incremented version.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Update an existing prompt (increments version).
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to update.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/UpdatePromptRequest'
|
||||||
|
required: true
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: Delete a prompt.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt to delete.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/inference/embeddings:
|
/v1/inference/embeddings:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1983,6 +2132,37 @@ paths:
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/Order'
|
$ref: '#/components/schemas/Order'
|
||||||
|
/v1/prompts/{prompt_id}/versions:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListPromptsResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: List all versions of a specific prompt.
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: >-
|
||||||
|
The identifier of the prompt to list versions for.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/providers:
|
/v1/providers:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -3546,6 +3726,43 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/ScoreBatchRequest'
|
$ref: '#/components/schemas/ScoreBatchRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/prompts/{prompt_id}/set-default-version:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
The prompt with the specified version now set as default.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Prompts
|
||||||
|
description: >-
|
||||||
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
parameters:
|
||||||
|
- name: prompt_id
|
||||||
|
in: path
|
||||||
|
description: The identifier of the prompt.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/SetDefaultVersionRequest'
|
||||||
|
required: true
|
||||||
/v1/post-training/supervised-fine-tune:
|
/v1/post-training/supervised-fine-tune:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -7148,6 +7365,61 @@ components:
|
||||||
- type
|
- type
|
||||||
title: >-
|
title: >-
|
||||||
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
||||||
|
CreatePromptRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The prompt text content with variable placeholders.
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of variable names that can be used in the prompt template.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- prompt
|
||||||
|
title: CreatePromptRequest
|
||||||
|
Prompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The system prompt text with variable placeholders. Variables are only
|
||||||
|
supported when using the Responses API.
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
Version (integer starting at 1, incremented on save)
|
||||||
|
prompt_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of prompt variable names that can be used in the prompt template
|
||||||
|
is_default:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
description: >-
|
||||||
|
Boolean indicating whether this version is the default version for this
|
||||||
|
prompt
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- version
|
||||||
|
- prompt_id
|
||||||
|
- variables
|
||||||
|
- is_default
|
||||||
|
title: Prompt
|
||||||
|
description: >-
|
||||||
|
A prompt resource representing a stored OpenAI Compatible prompt template
|
||||||
|
in Llama Stack.
|
||||||
OpenAIDeleteResponseObject:
|
OpenAIDeleteResponseObject:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7621,6 +7893,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: benchmark
|
const: benchmark
|
||||||
default: benchmark
|
default: benchmark
|
||||||
description: The resource type, always benchmark
|
description: The resource type, always benchmark
|
||||||
|
@ -8107,6 +8380,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: dataset
|
const: dataset
|
||||||
default: dataset
|
default: dataset
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8219,6 +8493,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: model
|
const: model
|
||||||
default: model
|
default: model
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8410,6 +8685,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: scoring_function
|
const: scoring_function
|
||||||
default: scoring_function
|
default: scoring_function
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -8486,6 +8762,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: shield
|
const: shield
|
||||||
default: shield
|
default: shield
|
||||||
description: The resource type, always shield
|
description: The resource type, always shield
|
||||||
|
@ -8665,6 +8942,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: tool
|
const: tool
|
||||||
default: tool
|
default: tool
|
||||||
description: Type of resource, always 'tool'
|
description: Type of resource, always 'tool'
|
||||||
|
@ -8723,6 +9001,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: tool_group
|
const: tool_group
|
||||||
default: tool_group
|
default: tool_group
|
||||||
description: Type of resource, always 'tool_group'
|
description: Type of resource, always 'tool_group'
|
||||||
|
@ -8951,6 +9230,7 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
|
- prompt
|
||||||
const: vector_db
|
const: vector_db
|
||||||
default: vector_db
|
default: vector_db
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -9577,6 +9857,18 @@ components:
|
||||||
title: OpenAIResponseObjectWithInput
|
title: OpenAIResponseObjectWithInput
|
||||||
description: >-
|
description: >-
|
||||||
OpenAI response object extended with input context information.
|
OpenAI response object extended with input context information.
|
||||||
|
ListPromptsResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/Prompt'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- data
|
||||||
|
title: ListPromptsResponse
|
||||||
|
description: Response model to list prompts.
|
||||||
ListProvidersResponse:
|
ListProvidersResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -12722,6 +13014,16 @@ components:
|
||||||
title: ScoreBatchResponse
|
title: ScoreBatchResponse
|
||||||
description: >-
|
description: >-
|
||||||
Response from batch scoring operations on datasets.
|
Response from batch scoring operations on datasets.
|
||||||
|
SetDefaultVersionRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: The version to set as default.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- version
|
||||||
|
title: SetDefaultVersionRequest
|
||||||
AlgorithmConfig:
|
AlgorithmConfig:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||||
|
@ -12918,6 +13220,32 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||||
tuples that pass the threshold.
|
tuples that pass the threshold.
|
||||||
|
UpdatePromptRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
prompt:
|
||||||
|
type: string
|
||||||
|
description: The updated prompt text content.
|
||||||
|
version:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The current version of the prompt being updated.
|
||||||
|
variables:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Updated list of variable names that can be used in the prompt template.
|
||||||
|
set_as_default:
|
||||||
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
Set the new version as the default (default=True).
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- prompt
|
||||||
|
- version
|
||||||
|
- set_as_default
|
||||||
|
title: UpdatePromptRequest
|
||||||
VersionInfo:
|
VersionInfo:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -13029,6 +13357,9 @@ tags:
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
|
- name: Prompts
|
||||||
|
x-displayName: >-
|
||||||
|
Protocol for prompt management operations.
|
||||||
- name: Providers
|
- name: Providers
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
@ -13056,6 +13387,7 @@ x-tagGroups:
|
||||||
- Inspect
|
- Inspect
|
||||||
- Models
|
- Models
|
||||||
- PostTraining (Coming Soon)
|
- PostTraining (Coming Soon)
|
||||||
|
- Prompts
|
||||||
- Providers
|
- Providers
|
||||||
- Safety
|
- Safety
|
||||||
- Scoring
|
- Scoring
|
||||||
|
|
|
@ -93,10 +93,31 @@ chunks_response = client.vector_io.query(
|
||||||
|
|
||||||
### Using the RAG Tool
|
### Using the RAG Tool
|
||||||
|
|
||||||
|
> **⚠️ DEPRECATION NOTICE**: The RAG Tool is being deprecated in favor of directly using the OpenAI-compatible Search
|
||||||
|
> API. We recommend migrating to the OpenAI APIs for better compatibility and future support.
|
||||||
|
|
||||||
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
|
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
|
||||||
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
|
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
|
||||||
[appendix](#more-ragdocument-examples).
|
[appendix](#more-ragdocument-examples).
|
||||||
|
|
||||||
|
#### OpenAI API Integration & Migration
|
||||||
|
|
||||||
|
The RAG tool has been updated to use OpenAI-compatible APIs. This provides several benefits:
|
||||||
|
|
||||||
|
- **Files API Integration**: Documents are now uploaded using OpenAI's file upload endpoints
|
||||||
|
- **Vector Stores API**: Vector storage operations use OpenAI's vector store format with configurable chunking strategies
|
||||||
|
- **Error Resilience:** When processing multiple documents, individual failures are logged but don't crash the operation. Failed documents are skipped while successful ones continue processing.
|
||||||
|
|
||||||
|
**Migration Path:**
|
||||||
|
We recommend migrating to the OpenAI-compatible Search API for:
|
||||||
|
1. **Better OpenAI Ecosystem Integration**: Direct compatibility with OpenAI tools and workflows including the Responses API
|
||||||
|
2**Future-Proof**: Continued support and feature development
|
||||||
|
3**Full OpenAI Compatibility**: Vector Stores, Files, and Search APIs are fully compatible with OpenAI's Responses API
|
||||||
|
|
||||||
|
The OpenAI APIs are used under the hood, so you can continue to use your existing RAG Tool code with minimal changes.
|
||||||
|
However, we recommend updating your code to use the new OpenAI-compatible APIs for better long-term support. If any
|
||||||
|
documents fail to process, they will be logged in the response but will not cause the entire operation to fail.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack_client import RAGDocument
|
from llama_stack_client import RAGDocument
|
||||||
|
|
||||||
|
|
|
@ -131,6 +131,7 @@ html_static_path = ["../_static"]
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_css_file("css/my_theme.css")
|
app.add_css_file("css/my_theme.css")
|
||||||
app.add_js_file("js/detect_theme.js")
|
app.add_js_file("js/detect_theme.js")
|
||||||
|
app.add_js_file("js/horizontal_nav.js")
|
||||||
app.add_js_file("js/keyboard_shortcuts.js")
|
app.add_js_file("js/keyboard_shortcuts.js")
|
||||||
|
|
||||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||||
|
|
|
@ -35,5 +35,5 @@ testing/record-replay
|
||||||
|
|
||||||
### Benchmarking
|
### Benchmarking
|
||||||
|
|
||||||
```{include} ../../../docs/source/distributions/k8s-benchmark/README.md
|
```{include} ../../../benchmarking/k8s-benchmark/README.md
|
||||||
```
|
```
|
||||||
|
|
|
@ -40,18 +40,15 @@ The system patches OpenAI and Ollama client methods to intercept calls before th
|
||||||
|
|
||||||
### Storage Architecture
|
### Storage Architecture
|
||||||
|
|
||||||
Recordings use a two-tier storage system optimized for both speed and debuggability:
|
Recordings are stored as JSON files in the recording directory. They are looked up by their request hash.
|
||||||
|
|
||||||
```
|
```
|
||||||
recordings/
|
recordings/
|
||||||
├── index.sqlite # Fast lookup by request hash
|
|
||||||
└── responses/
|
└── responses/
|
||||||
├── abc123def456.json # Individual response files
|
├── abc123def456.json # Individual response files
|
||||||
└── def789ghi012.json
|
└── def789ghi012.json
|
||||||
```
|
```
|
||||||
|
|
||||||
**SQLite index** enables O(log n) hash lookups and metadata queries without loading response bodies.
|
|
||||||
|
|
||||||
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
||||||
|
|
||||||
## Recording Modes
|
## Recording Modes
|
||||||
|
@ -166,8 +163,8 @@ This preserves type safety - when replayed, you get the same Pydantic objects wi
|
||||||
Control recording behavior globally:
|
Control recording behavior globally:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export LLAMA_STACK_TEST_INFERENCE_MODE=replay
|
export LLAMA_STACK_TEST_INFERENCE_MODE=replay # this is the default
|
||||||
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings
|
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings # default is tests/integration/recordings
|
||||||
pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -354,6 +354,47 @@ You can easily validate a request by running:
|
||||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Kubernetes Authentication Provider
|
||||||
|
|
||||||
|
The server can be configured to use Kubernetes SelfSubjectReview API to validate tokens directly against the Kubernetes API server:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
auth:
|
||||||
|
provider_config:
|
||||||
|
type: "kubernetes"
|
||||||
|
api_server_url: "https://kubernetes.default.svc"
|
||||||
|
claims_mapping:
|
||||||
|
username: "roles"
|
||||||
|
groups: "roles"
|
||||||
|
uid: "uid_attr"
|
||||||
|
verify_tls: true
|
||||||
|
tls_cafile: "/path/to/ca.crt"
|
||||||
|
```
|
||||||
|
|
||||||
|
Configuration options:
|
||||||
|
- `api_server_url`: The Kubernetes API server URL (e.g., https://kubernetes.default.svc:6443)
|
||||||
|
- `verify_tls`: Whether to verify TLS certificates (default: true)
|
||||||
|
- `tls_cafile`: Path to CA certificate file for TLS verification
|
||||||
|
- `claims_mapping`: Mapping of Kubernetes user claims to access attributes
|
||||||
|
|
||||||
|
The provider validates tokens by sending a SelfSubjectReview request to the Kubernetes API server at `/apis/authentication.k8s.io/v1/selfsubjectreviews`. The provider extracts user information from the response:
|
||||||
|
- Username from the `userInfo.username` field
|
||||||
|
- Groups from the `userInfo.groups` field
|
||||||
|
- UID from the `userInfo.uid` field
|
||||||
|
|
||||||
|
To obtain a token for testing:
|
||||||
|
```bash
|
||||||
|
kubectl create namespace llama-stack
|
||||||
|
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||||
|
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||||
|
```
|
||||||
|
|
||||||
|
You can validate a request by running:
|
||||||
|
```bash
|
||||||
|
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||||
|
```
|
||||||
|
|
||||||
#### GitHub Token Provider
|
#### GitHub Token Provider
|
||||||
Validates GitHub personal access tokens or OAuth tokens directly:
|
Validates GitHub personal access tokens or OAuth tokens directly:
|
||||||
```yaml
|
```yaml
|
||||||
|
|
|
@ -1,137 +1,55 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
stack_run_config.yaml: |
|
stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
|
||||||
version: '2'
|
inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
|
||||||
image_name: kubernetes-demo
|
\ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n
|
||||||
apis:
|
\ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens:
|
||||||
- agents
|
${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify:
|
||||||
- inference
|
${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type:
|
||||||
- safety
|
remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
|
||||||
- telemetry
|
\ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n
|
||||||
- tool_runtime
|
\ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n
|
||||||
- vector_io
|
\ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n
|
||||||
providers:
|
\ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n
|
||||||
inference:
|
\ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n
|
||||||
- provider_id: vllm-inference
|
\ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n
|
||||||
provider_type: remote::vllm
|
\ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n
|
||||||
config:
|
\ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id:
|
||||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
\ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
\ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n
|
||||||
- provider_id: vllm-safety
|
\ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n
|
||||||
provider_type: remote::vllm
|
\ provider_type: inline::meta-reference\n config:\n persistence_store:\n
|
||||||
config:
|
\ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port:
|
||||||
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
||||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
\ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
||||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
||||||
- provider_id: sentence-transformers
|
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
provider_type: inline::sentence-transformers
|
\ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n
|
||||||
config: {}
|
\ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks:
|
||||||
vector_io:
|
${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
\ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
|
||||||
provider_type: remote::chromadb
|
\ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n
|
||||||
config:
|
\ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results:
|
||||||
url: ${env.CHROMADB_URL:=}
|
3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config:
|
||||||
kvstore:
|
{}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n
|
||||||
type: postgres
|
\ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
\ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host:
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
|
||||||
safety:
|
metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id:
|
||||||
- provider_id: llama-guard
|
sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n
|
||||||
provider_type: inline::llama-guard
|
\ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id:
|
||||||
config:
|
${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n
|
||||||
excluded_categories: []
|
\ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
|
||||||
agents:
|
[]\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
|
||||||
- provider_id: meta-reference
|
builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
|
||||||
provider_type: inline::meta-reference
|
\ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n
|
||||||
config:
|
\ type: github_token\n"
|
||||||
persistence_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
responses_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console}
|
|
||||||
tool_runtime:
|
|
||||||
- provider_id: brave-search
|
|
||||||
provider_type: remote::brave-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: tavily-search
|
|
||||||
provider_type: remote::tavily-search
|
|
||||||
config:
|
|
||||||
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
|
||||||
max_results: 3
|
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
config: {}
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
table_name: llamastack_kvstore
|
|
||||||
inference_store:
|
|
||||||
type: postgres
|
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
||||||
models:
|
|
||||||
- metadata:
|
|
||||||
embedding_dimension: 384
|
|
||||||
model_id: all-MiniLM-L6-v2
|
|
||||||
provider_id: sentence-transformers
|
|
||||||
model_type: embedding
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
|
||||||
provider_id: vllm-inference
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
|
||||||
provider_id: vllm-safety
|
|
||||||
model_type: llm
|
|
||||||
shields:
|
|
||||||
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
|
||||||
vector_dbs: []
|
|
||||||
datasets: []
|
|
||||||
scoring_fns: []
|
|
||||||
benchmarks: []
|
|
||||||
tool_groups:
|
|
||||||
- toolgroup_id: builtin::websearch
|
|
||||||
provider_id: tavily-search
|
|
||||||
- toolgroup_id: builtin::rag
|
|
||||||
provider_id: rag-runtime
|
|
||||||
server:
|
|
||||||
port: 8321
|
|
||||||
auth:
|
|
||||||
provider_config:
|
|
||||||
type: github_token
|
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
creationTimestamp: null
|
creationTimestamp: null
|
||||||
|
|
|
@ -3,6 +3,7 @@ image_name: kubernetes-demo
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
- files
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
@ -38,6 +39,14 @@ providers:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -18,12 +18,13 @@ embedding_model_id = (
|
||||||
).identifier
|
).identifier
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
embedding_dimension = em.metadata["embedding_dimension"]
|
||||||
|
|
||||||
_ = client.vector_dbs.register(
|
vector_db = client.vector_dbs.register(
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
embedding_model=embedding_model_id,
|
embedding_model=embedding_model_id,
|
||||||
embedding_dimension=embedding_dimension,
|
embedding_dimension=embedding_dimension,
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
)
|
)
|
||||||
|
vector_db_id = vector_db.identifier
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
source = "https://www.paulgraham.com/greatwork.html"
|
||||||
print("rag_tool> Ingesting document:", source)
|
print("rag_tool> Ingesting document:", source)
|
||||||
document = RAGDocument(
|
document = RAGDocument(
|
||||||
|
@ -35,7 +36,7 @@ document = RAGDocument(
|
||||||
client.tool_runtime.rag_tool.insert(
|
client.tool_runtime.rag_tool.insert(
|
||||||
documents=[document],
|
documents=[document],
|
||||||
vector_db_id=vector_db_id,
|
vector_db_id=vector_db_id,
|
||||||
chunk_size_in_tokens=50,
|
chunk_size_in_tokens=100,
|
||||||
)
|
)
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
client,
|
client,
|
||||||
|
|
|
@ -8,3 +8,4 @@ Here's a list of known external providers that you can use with Llama Stack:
|
||||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
||||||
|
| MongoDB | VectorIO with MongoDB | Vector_IO | Remote | [mongodb-llama-stack](https://github.com/mongodb-partners/mongodb-llama-stack) |
|
||||||
|
|
|
@ -18,6 +18,7 @@ This section contains documentation for all available providers for the **infere
|
||||||
inline_meta-reference
|
inline_meta-reference
|
||||||
inline_sentence-transformers
|
inline_sentence-transformers
|
||||||
remote_anthropic
|
remote_anthropic
|
||||||
|
remote_azure
|
||||||
remote_bedrock
|
remote_bedrock
|
||||||
remote_cerebras
|
remote_cerebras
|
||||||
remote_databricks
|
remote_databricks
|
||||||
|
|
29
docs/source/providers/inference/remote_azure.md
Normal file
29
docs/source/providers/inference/remote_azure.md
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# remote::azure
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
||||||
|
Provider documentation
|
||||||
|
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||||
|
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||||
|
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||||
|
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.AZURE_API_KEY:=}
|
||||||
|
api_base: ${env.AZURE_API_BASE:=}
|
||||||
|
api_version: ${env.AZURE_API_VERSION:=}
|
||||||
|
api_type: ${env.AZURE_API_TYPE:=}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -15,8 +15,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -79,3 +79,10 @@ class ConflictError(ValueError):
|
||||||
|
|
||||||
def __init__(self, message: str) -> None:
|
def __init__(self, message: str) -> None:
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
class TokenValidationError(ValueError):
|
||||||
|
"""raised when token validation fails during authentication"""
|
||||||
|
|
||||||
|
def __init__(self, message: str) -> None:
|
||||||
|
super().__init__(message)
|
||||||
|
|
|
@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
:cvar benchmarks: Benchmark suite management
|
:cvar benchmarks: Benchmark suite management
|
||||||
:cvar tool_groups: Tool group organization
|
:cvar tool_groups: Tool group organization
|
||||||
:cvar files: File storage and management
|
:cvar files: File storage and management
|
||||||
|
:cvar prompts: Prompt versions and management
|
||||||
:cvar inspect: Built-in system inspection and introspection
|
:cvar inspect: Built-in system inspection and introspection
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -127,6 +128,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
benchmarks = "benchmarks"
|
benchmarks = "benchmarks"
|
||||||
tool_groups = "tool_groups"
|
tool_groups = "tool_groups"
|
||||||
files = "files"
|
files = "files"
|
||||||
|
prompts = "prompts"
|
||||||
|
|
||||||
# built-in API
|
# built-in API
|
||||||
inspect = "inspect"
|
inspect = "inspect"
|
||||||
|
|
9
llama_stack/apis/prompts/__init__.py
Normal file
9
llama_stack/apis/prompts/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
|
|
||||||
|
__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
|
189
llama_stack/apis/prompts/prompts.py
Normal file
189
llama_stack/apis/prompts/prompts.py
Normal file
|
@ -0,0 +1,189 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import secrets
|
||||||
|
from typing import Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Prompt(BaseModel):
|
||||||
|
"""A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
|
||||||
|
|
||||||
|
:param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
|
||||||
|
:param version: Version (integer starting at 1, incremented on save)
|
||||||
|
:param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||||
|
:param variables: List of prompt variable names that can be used in the prompt template
|
||||||
|
:param is_default: Boolean indicating whether this version is the default version for this prompt
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
|
||||||
|
version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
|
||||||
|
prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
|
||||||
|
variables: list[str] = Field(
|
||||||
|
default_factory=list, description="List of variable names that can be used in the prompt template"
|
||||||
|
)
|
||||||
|
is_default: bool = Field(
|
||||||
|
default=False, description="Boolean indicating whether this version is the default version"
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("prompt_id")
|
||||||
|
@classmethod
|
||||||
|
def validate_prompt_id(cls, prompt_id: str) -> str:
|
||||||
|
if not isinstance(prompt_id, str):
|
||||||
|
raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
|
||||||
|
|
||||||
|
if not prompt_id.startswith("pmpt_"):
|
||||||
|
raise ValueError("prompt_id must start with 'pmpt_' prefix")
|
||||||
|
|
||||||
|
hex_part = prompt_id[5:]
|
||||||
|
if len(hex_part) != 48:
|
||||||
|
raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
|
||||||
|
|
||||||
|
for char in hex_part:
|
||||||
|
if char not in "0123456789abcdef":
|
||||||
|
raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
|
||||||
|
|
||||||
|
return prompt_id
|
||||||
|
|
||||||
|
@field_validator("version")
|
||||||
|
@classmethod
|
||||||
|
def validate_version(cls, prompt_version: int) -> int:
|
||||||
|
if prompt_version < 1:
|
||||||
|
raise ValueError("version must be >= 1")
|
||||||
|
return prompt_version
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_prompt_variables(self):
|
||||||
|
"""Validate that all variables used in the prompt are declared in the variables list."""
|
||||||
|
if not self.prompt:
|
||||||
|
return self
|
||||||
|
|
||||||
|
prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
|
||||||
|
declared_variables = set(self.variables)
|
||||||
|
|
||||||
|
undeclared = prompt_variables - declared_variables
|
||||||
|
if undeclared:
|
||||||
|
raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def generate_prompt_id(cls) -> str:
|
||||||
|
# Generate 48 hex characters (24 bytes)
|
||||||
|
random_bytes = secrets.token_bytes(24)
|
||||||
|
hex_string = random_bytes.hex()
|
||||||
|
return f"pmpt_{hex_string}"
|
||||||
|
|
||||||
|
|
||||||
|
class ListPromptsResponse(BaseModel):
|
||||||
|
"""Response model to list prompts."""
|
||||||
|
|
||||||
|
data: list[Prompt]
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
@trace_protocol
|
||||||
|
class Prompts(Protocol):
|
||||||
|
"""Protocol for prompt management operations."""
|
||||||
|
|
||||||
|
@webmethod(route="/prompts", method="GET")
|
||||||
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
"""List all prompts.
|
||||||
|
|
||||||
|
:returns: A ListPromptsResponse containing all prompts.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}/versions", method="GET")
|
||||||
|
async def list_prompt_versions(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
) -> ListPromptsResponse:
|
||||||
|
"""List all versions of a specific prompt.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to list versions for.
|
||||||
|
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="GET")
|
||||||
|
async def get_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
version: int | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Get a prompt by its identifier and optional version.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to get.
|
||||||
|
:param version: The version of the prompt to get (defaults to latest).
|
||||||
|
:returns: A Prompt resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts", method="POST")
|
||||||
|
async def create_prompt(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Create a new prompt.
|
||||||
|
|
||||||
|
:param prompt: The prompt text content with variable placeholders.
|
||||||
|
:param variables: List of variable names that can be used in the prompt template.
|
||||||
|
:returns: The created Prompt resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="PUT")
|
||||||
|
async def update_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
prompt: str,
|
||||||
|
version: int,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
set_as_default: bool = True,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Update an existing prompt (increments version).
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to update.
|
||||||
|
:param prompt: The updated prompt text content.
|
||||||
|
:param version: The current version of the prompt being updated.
|
||||||
|
:param variables: Updated list of variable names that can be used in the prompt template.
|
||||||
|
:param set_as_default: Set the new version as the default (default=True).
|
||||||
|
:returns: The updated Prompt resource with incremented version.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}", method="DELETE")
|
||||||
|
async def delete_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
) -> None:
|
||||||
|
"""Delete a prompt.
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt to delete.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
|
||||||
|
async def set_default_version(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
version: int,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
|
||||||
|
:param prompt_id: The identifier of the prompt.
|
||||||
|
:param version: The version to set as default.
|
||||||
|
:returns: The prompt with the specified version now set as default.
|
||||||
|
"""
|
||||||
|
...
|
|
@ -19,6 +19,7 @@ class ResourceType(StrEnum):
|
||||||
benchmark = "benchmark"
|
benchmark = "benchmark"
|
||||||
tool = "tool"
|
tool = "tool"
|
||||||
tool_group = "tool_group"
|
tool_group = "tool_group"
|
||||||
|
prompt = "prompt"
|
||||||
|
|
||||||
|
|
||||||
class Resource(BaseModel):
|
class Resource(BaseModel):
|
||||||
|
|
|
@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||||
|
|
||||||
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
@ -294,6 +295,12 @@ def _generate_run_config(
|
||||||
if build_config.external_providers_dir
|
if build_config.external_providers_dir
|
||||||
else EXTERNAL_PROVIDERS_DIR,
|
else EXTERNAL_PROVIDERS_DIR,
|
||||||
)
|
)
|
||||||
|
if not run_config.inference_store:
|
||||||
|
run_config.inference_store = SqliteSqlStoreConfig(
|
||||||
|
**SqliteSqlStoreConfig.sample_run_config(
|
||||||
|
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
|
||||||
|
)
|
||||||
|
)
|
||||||
# build providers dict
|
# build providers dict
|
||||||
provider_registry = get_provider_registry(build_config)
|
provider_registry = get_provider_registry(build_config)
|
||||||
for api in apis:
|
for api in apis:
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, Any, Literal, Self
|
from typing import Annotated, Any, Literal, Self
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
|
|
||||||
|
@ -212,6 +213,7 @@ class AuthProviderType(StrEnum):
|
||||||
OAUTH2_TOKEN = "oauth2_token"
|
OAUTH2_TOKEN = "oauth2_token"
|
||||||
GITHUB_TOKEN = "github_token"
|
GITHUB_TOKEN = "github_token"
|
||||||
CUSTOM = "custom"
|
CUSTOM = "custom"
|
||||||
|
KUBERNETES = "kubernetes"
|
||||||
|
|
||||||
|
|
||||||
class OAuth2TokenAuthConfig(BaseModel):
|
class OAuth2TokenAuthConfig(BaseModel):
|
||||||
|
@ -282,8 +284,45 @@ class GitHubTokenAuthConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KubernetesAuthProviderConfig(BaseModel):
|
||||||
|
"""Configuration for Kubernetes authentication provider."""
|
||||||
|
|
||||||
|
type: Literal[AuthProviderType.KUBERNETES] = AuthProviderType.KUBERNETES
|
||||||
|
api_server_url: str = Field(
|
||||||
|
default="https://kubernetes.default.svc",
|
||||||
|
description="Kubernetes API server URL (e.g., https://api.cluster.domain:6443)",
|
||||||
|
)
|
||||||
|
verify_tls: bool = Field(default=True, description="Whether to verify TLS certificates")
|
||||||
|
tls_cafile: Path | None = Field(default=None, description="Path to CA certificate file for TLS verification")
|
||||||
|
claims_mapping: dict[str, str] = Field(
|
||||||
|
default_factory=lambda: {
|
||||||
|
"username": "roles",
|
||||||
|
"groups": "roles",
|
||||||
|
},
|
||||||
|
description="Mapping of Kubernetes user claims to access attributes",
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("api_server_url")
|
||||||
|
@classmethod
|
||||||
|
def validate_api_server_url(cls, v):
|
||||||
|
parsed = urlparse(v)
|
||||||
|
if not parsed.scheme or not parsed.netloc:
|
||||||
|
raise ValueError(f"api_server_url must be a valid URL with scheme and host: {v}")
|
||||||
|
if parsed.scheme not in ["http", "https"]:
|
||||||
|
raise ValueError(f"api_server_url scheme must be http or https: {v}")
|
||||||
|
return v
|
||||||
|
|
||||||
|
@field_validator("claims_mapping")
|
||||||
|
@classmethod
|
||||||
|
def validate_claims_mapping(cls, v):
|
||||||
|
for key, value in v.items():
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"claims_mapping value cannot be empty: {key}")
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
AuthProviderConfig = Annotated[
|
AuthProviderConfig = Annotated[
|
||||||
OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig,
|
OAuth2TokenAuthConfig | GitHubTokenAuthConfig | CustomAuthConfig | KubernetesAuthProviderConfig,
|
||||||
Field(discriminator="type"),
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -392,6 +431,12 @@ class ServerConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceStoreConfig(BaseModel):
|
||||||
|
sql_store_config: SqlStoreConfig
|
||||||
|
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
|
||||||
|
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackRunConfig(BaseModel):
|
||||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
|
@ -425,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
|
||||||
a default SQLite store will be used.""",
|
a default SQLite store will be used.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
inference_store: SqlStoreConfig | None = Field(
|
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="""
|
description="""
|
||||||
Configuration for the persistence store used by the inference API. If not specified,
|
Configuration for the persistence store used by the inference API. Can be either a
|
||||||
a default SQLite store will be used.""",
|
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
|
||||||
|
If not specified, a default SQLite store will be used.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
# registry of "resources" in the distribution
|
# registry of "resources" in the distribution
|
||||||
|
|
|
@ -10,7 +10,6 @@ import json
|
||||||
import logging # allow-direct-logging
|
import logging # allow-direct-logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
||||||
self.async_client = AsyncLlamaStackAsLibraryClient(
|
self.async_client = AsyncLlamaStackAsLibraryClient(
|
||||||
config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
|
config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
|
||||||
)
|
)
|
||||||
self.pool_executor = ThreadPoolExecutor(max_workers=4)
|
|
||||||
self.provider_data = provider_data
|
self.provider_data = provider_data
|
||||||
|
|
||||||
self.loop = asyncio.new_event_loop()
|
self.loop = asyncio.new_event_loop()
|
||||||
|
|
233
llama_stack/core/prompts/prompts.py
Normal file
233
llama_stack/core/prompts/prompts.py
Normal file
|
@ -0,0 +1,233 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
|
|
||||||
|
|
||||||
|
class PromptServiceConfig(BaseModel):
|
||||||
|
"""Configuration for the built-in prompt service.
|
||||||
|
|
||||||
|
:param run_config: Stack run configuration containing distribution info
|
||||||
|
"""
|
||||||
|
|
||||||
|
run_config: StackRunConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
|
"""Get the prompt service implementation."""
|
||||||
|
impl = PromptServiceImpl(config, deps)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
||||||
|
|
||||||
|
|
||||||
|
class PromptServiceImpl(Prompts):
|
||||||
|
"""Built-in prompt service implementation using KVStore."""
|
||||||
|
|
||||||
|
def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
|
self.config = config
|
||||||
|
self.deps = deps
|
||||||
|
self.kvstore: KVStore
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
kvstore_config = SqliteKVStoreConfig(
|
||||||
|
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
|
||||||
|
)
|
||||||
|
self.kvstore = await kvstore_impl(kvstore_config)
|
||||||
|
|
||||||
|
def _get_default_key(self, prompt_id: str) -> str:
|
||||||
|
"""Get the KVStore key that stores the default version number."""
|
||||||
|
return f"prompts:v1:{prompt_id}:default"
|
||||||
|
|
||||||
|
async def _get_prompt_key(self, prompt_id: str, version: int | None = None) -> str:
|
||||||
|
"""Get the KVStore key for prompt data, returning default version if applicable."""
|
||||||
|
if version:
|
||||||
|
return self._get_version_key(prompt_id, str(version))
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_id)
|
||||||
|
resolved_version = await self.kvstore.get(default_key)
|
||||||
|
if resolved_version is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id}:default not found")
|
||||||
|
return self._get_version_key(prompt_id, resolved_version)
|
||||||
|
|
||||||
|
def _get_version_key(self, prompt_id: str, version: str) -> str:
|
||||||
|
"""Get the KVStore key for a specific prompt version."""
|
||||||
|
return f"prompts:v1:{prompt_id}:{version}"
|
||||||
|
|
||||||
|
def _get_list_key_prefix(self) -> str:
|
||||||
|
"""Get the key prefix for listing prompts."""
|
||||||
|
return "prompts:v1:"
|
||||||
|
|
||||||
|
def _serialize_prompt(self, prompt: Prompt) -> str:
|
||||||
|
"""Serialize a prompt to JSON string for storage."""
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"prompt_id": prompt.prompt_id,
|
||||||
|
"prompt": prompt.prompt,
|
||||||
|
"version": prompt.version,
|
||||||
|
"variables": prompt.variables or [],
|
||||||
|
"is_default": prompt.is_default,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def _deserialize_prompt(self, data: str) -> Prompt:
|
||||||
|
"""Deserialize a prompt from JSON string."""
|
||||||
|
obj = json.loads(data)
|
||||||
|
return Prompt(
|
||||||
|
prompt_id=obj["prompt_id"],
|
||||||
|
prompt=obj["prompt"],
|
||||||
|
version=obj["version"],
|
||||||
|
variables=obj.get("variables", []),
|
||||||
|
is_default=obj.get("is_default", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
"""List all prompts (default versions only)."""
|
||||||
|
prefix = self._get_list_key_prefix()
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
prompts = []
|
||||||
|
for key in keys:
|
||||||
|
if key.endswith(":default"):
|
||||||
|
try:
|
||||||
|
default_version = await self.kvstore.get(key)
|
||||||
|
if default_version:
|
||||||
|
prompt_id = key.replace(prefix, "").replace(":default", "")
|
||||||
|
version_key = self._get_version_key(prompt_id, default_version)
|
||||||
|
data = await self.kvstore.get(version_key)
|
||||||
|
if data:
|
||||||
|
prompt = self._deserialize_prompt(data)
|
||||||
|
prompts.append(prompt)
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
|
||||||
|
return ListPromptsResponse(data=prompts)
|
||||||
|
|
||||||
|
async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt:
|
||||||
|
"""Get a prompt by its identifier and optional version."""
|
||||||
|
key = await self._get_prompt_key(prompt_id, version)
|
||||||
|
data = await self.kvstore.get(key)
|
||||||
|
if data is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found")
|
||||||
|
return self._deserialize_prompt(data)
|
||||||
|
|
||||||
|
async def create_prompt(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Create a new prompt."""
|
||||||
|
if variables is None:
|
||||||
|
variables = []
|
||||||
|
|
||||||
|
prompt_obj = Prompt(
|
||||||
|
prompt_id=Prompt.generate_prompt_id(),
|
||||||
|
prompt=prompt,
|
||||||
|
version=1,
|
||||||
|
variables=variables,
|
||||||
|
)
|
||||||
|
|
||||||
|
version_key = self._get_version_key(prompt_obj.prompt_id, str(prompt_obj.version))
|
||||||
|
data = self._serialize_prompt(prompt_obj)
|
||||||
|
await self.kvstore.set(version_key, data)
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_obj.prompt_id)
|
||||||
|
await self.kvstore.set(default_key, str(prompt_obj.version))
|
||||||
|
|
||||||
|
return prompt_obj
|
||||||
|
|
||||||
|
async def update_prompt(
|
||||||
|
self,
|
||||||
|
prompt_id: str,
|
||||||
|
prompt: str,
|
||||||
|
version: int,
|
||||||
|
variables: list[str] | None = None,
|
||||||
|
set_as_default: bool = True,
|
||||||
|
) -> Prompt:
|
||||||
|
"""Update an existing prompt (increments version)."""
|
||||||
|
if version < 1:
|
||||||
|
raise ValueError("Version must be >= 1")
|
||||||
|
if variables is None:
|
||||||
|
variables = []
|
||||||
|
|
||||||
|
prompt_versions = await self.list_prompt_versions(prompt_id)
|
||||||
|
latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version))
|
||||||
|
|
||||||
|
if version and latest_prompt.version != version:
|
||||||
|
raise ValueError(
|
||||||
|
f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
|
||||||
|
)
|
||||||
|
|
||||||
|
current_version = latest_prompt.version if version is None else version
|
||||||
|
new_version = current_version + 1
|
||||||
|
|
||||||
|
updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables)
|
||||||
|
|
||||||
|
version_key = self._get_version_key(prompt_id, str(new_version))
|
||||||
|
data = self._serialize_prompt(updated_prompt)
|
||||||
|
await self.kvstore.set(version_key, data)
|
||||||
|
|
||||||
|
if set_as_default:
|
||||||
|
await self.set_default_version(prompt_id, new_version)
|
||||||
|
|
||||||
|
return updated_prompt
|
||||||
|
|
||||||
|
async def delete_prompt(self, prompt_id: str) -> None:
|
||||||
|
"""Delete a prompt and all its versions."""
|
||||||
|
await self.get_prompt(prompt_id)
|
||||||
|
|
||||||
|
prefix = f"prompts:v1:{prompt_id}:"
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
await self.kvstore.delete(key)
|
||||||
|
|
||||||
|
async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse:
|
||||||
|
"""List all versions of a specific prompt."""
|
||||||
|
prefix = f"prompts:v1:{prompt_id}:"
|
||||||
|
keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
|
||||||
|
|
||||||
|
default_version = None
|
||||||
|
prompts = []
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
data = await self.kvstore.get(key)
|
||||||
|
if key.endswith(":default"):
|
||||||
|
default_version = data
|
||||||
|
else:
|
||||||
|
if data:
|
||||||
|
prompt_obj = self._deserialize_prompt(data)
|
||||||
|
prompts.append(prompt_obj)
|
||||||
|
|
||||||
|
if not prompts:
|
||||||
|
raise ValueError(f"Prompt {prompt_id} not found")
|
||||||
|
|
||||||
|
for prompt in prompts:
|
||||||
|
prompt.is_default = str(prompt.version) == default_version
|
||||||
|
|
||||||
|
prompts.sort(key=lambda x: x.version)
|
||||||
|
return ListPromptsResponse(data=prompts)
|
||||||
|
|
||||||
|
async def set_default_version(self, prompt_id: str, version: int) -> Prompt:
|
||||||
|
"""Set which version of a prompt should be the default, If not set. the default is the latest."""
|
||||||
|
version_key = self._get_version_key(prompt_id, str(version))
|
||||||
|
data = await self.kvstore.get(version_key)
|
||||||
|
if data is None:
|
||||||
|
raise ValueError(f"Prompt {prompt_id} version {version} not found")
|
||||||
|
|
||||||
|
default_key = self._get_default_key(prompt_id)
|
||||||
|
await self.kvstore.set(default_key, str(version))
|
||||||
|
|
||||||
|
return self._deserialize_prompt(data)
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference, InferenceProvider
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
from llama_stack.apis.prompts import Prompts
|
||||||
from llama_stack.apis.providers import Providers as ProvidersAPI
|
from llama_stack.apis.providers import Providers as ProvidersAPI
|
||||||
from llama_stack.apis.safety import Safety
|
from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.scoring import Scoring
|
from llama_stack.apis.scoring import Scoring
|
||||||
|
@ -93,6 +94,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
||||||
Api.tool_groups: ToolGroups,
|
Api.tool_groups: ToolGroups,
|
||||||
Api.tool_runtime: ToolRuntime,
|
Api.tool_runtime: ToolRuntime,
|
||||||
Api.files: Files,
|
Api.files: Files,
|
||||||
|
Api.prompts: Prompts,
|
||||||
}
|
}
|
||||||
|
|
||||||
if external_apis:
|
if external_apis:
|
||||||
|
@ -284,7 +286,15 @@ async def instantiate_providers(
|
||||||
if provider.provider_id is None:
|
if provider.provider_id is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
||||||
|
except KeyError as e:
|
||||||
|
missing_api = e.args[0]
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
|
||||||
|
f"required dependency '{missing_api.value}' is not available. "
|
||||||
|
f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
|
||||||
|
) from e
|
||||||
for a in provider.spec.optional_api_dependencies:
|
for a in provider.spec.optional_api_dependencies:
|
||||||
if a in impls:
|
if a in impls:
|
||||||
deps[a] = impls[a]
|
deps[a] = impls[a]
|
||||||
|
|
|
@ -78,7 +78,10 @@ async def get_auto_router_impl(
|
||||||
|
|
||||||
# TODO: move pass configs to routers instead
|
# TODO: move pass configs to routers instead
|
||||||
if api == Api.inference and run_config.inference_store:
|
if api == Api.inference and run_config.inference_store:
|
||||||
inference_store = InferenceStore(run_config.inference_store, policy)
|
inference_store = InferenceStore(
|
||||||
|
config=run_config.inference_store,
|
||||||
|
policy=policy,
|
||||||
|
)
|
||||||
await inference_store.initialize()
|
await inference_store.initialize()
|
||||||
api_to_dep_impl["store"] = inference_store
|
api_to_dep_impl["store"] = inference_store
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,7 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core::routers")
|
logger = get_logger(name=__name__, category="core::routers")
|
||||||
|
|
||||||
|
@ -90,6 +90,11 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
logger.debug("InferenceRouter.shutdown")
|
logger.debug("InferenceRouter.shutdown")
|
||||||
|
if self.store:
|
||||||
|
try:
|
||||||
|
await self.store.shutdown()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error during InferenceStore shutdown: {e}")
|
||||||
|
|
||||||
async def register_model(
|
async def register_model(
|
||||||
self,
|
self,
|
||||||
|
@ -160,7 +165,7 @@ class InferenceRouter(Inference):
|
||||||
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
||||||
|
|
||||||
async def _count_tokens(
|
async def _count_tokens(
|
||||||
|
@ -431,7 +436,7 @@ class InferenceRouter(Inference):
|
||||||
model=model_obj,
|
model=model_obj,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# these metrics will show up in the client response.
|
# these metrics will show up in the client response.
|
||||||
response.metrics = (
|
response.metrics = (
|
||||||
|
@ -527,7 +532,7 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
# Store the response with the ID that will be returned to the client
|
# Store the response with the ID that will be returned to the client
|
||||||
if self.store:
|
if self.store:
|
||||||
await self.store.store_chat_completion(response, messages)
|
asyncio.create_task(self.store.store_chat_completion(response, messages))
|
||||||
|
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
metrics = self._construct_metrics(
|
metrics = self._construct_metrics(
|
||||||
|
@ -537,7 +542,7 @@ class InferenceRouter(Inference):
|
||||||
model=model_obj,
|
model=model_obj,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
# these metrics will show up in the client response.
|
# these metrics will show up in the client response.
|
||||||
response.metrics = (
|
response.metrics = (
|
||||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||||
|
@ -664,7 +669,7 @@ class InferenceRouter(Inference):
|
||||||
"completion_tokens",
|
"completion_tokens",
|
||||||
"total_tokens",
|
"total_tokens",
|
||||||
]: # Only log completion and total tokens
|
]: # Only log completion and total tokens
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# Return metrics in response
|
# Return metrics in response
|
||||||
async_metrics = [
|
async_metrics = [
|
||||||
|
@ -710,7 +715,7 @@ class InferenceRouter(Inference):
|
||||||
)
|
)
|
||||||
for metric in completion_metrics:
|
for metric in completion_metrics:
|
||||||
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
# Return metrics in response
|
# Return metrics in response
|
||||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
|
||||||
|
@ -755,7 +760,7 @@ class InferenceRouter(Inference):
|
||||||
choices_data[idx] = {
|
choices_data[idx] = {
|
||||||
"content_parts": [],
|
"content_parts": [],
|
||||||
"tool_calls_builder": {},
|
"tool_calls_builder": {},
|
||||||
"finish_reason": None,
|
"finish_reason": "stop",
|
||||||
"logprobs_content_parts": [],
|
"logprobs_content_parts": [],
|
||||||
}
|
}
|
||||||
current_choice_data = choices_data[idx]
|
current_choice_data = choices_data[idx]
|
||||||
|
@ -806,7 +811,7 @@ class InferenceRouter(Inference):
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
enqueue_event(metric)
|
||||||
|
|
||||||
yield chunk
|
yield chunk
|
||||||
finally:
|
finally:
|
||||||
|
@ -855,4 +860,4 @@ class InferenceRouter(Inference):
|
||||||
object="chat.completion",
|
object="chat.completion",
|
||||||
)
|
)
|
||||||
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
||||||
await self.store.store_chat_completion(final_response, messages)
|
asyncio.create_task(self.store.store_chat_completion(final_response, messages))
|
||||||
|
|
|
@ -52,7 +52,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
provider_vector_db_id: str | None = None,
|
provider_vector_db_id: str | None = None,
|
||||||
vector_db_name: str | None = None,
|
vector_db_name: str | None = None,
|
||||||
) -> VectorDB:
|
) -> VectorDB:
|
||||||
provider_vector_db_id = provider_vector_db_id or vector_db_id
|
|
||||||
if provider_id is None:
|
if provider_id is None:
|
||||||
if len(self.impls_by_provider_id) > 0:
|
if len(self.impls_by_provider_id) > 0:
|
||||||
provider_id = list(self.impls_by_provider_id.keys())[0]
|
provider_id = list(self.impls_by_provider_id.keys())[0]
|
||||||
|
@ -69,14 +68,33 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||||
if "embedding_dimension" not in model.metadata:
|
if "embedding_dimension" not in model.metadata:
|
||||||
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
||||||
|
|
||||||
|
provider = self.impls_by_provider_id[provider_id]
|
||||||
|
logger.warning(
|
||||||
|
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
|
||||||
|
)
|
||||||
|
vector_store = await provider.openai_create_vector_store(
|
||||||
|
name=vector_db_name or vector_db_id,
|
||||||
|
embedding_model=embedding_model,
|
||||||
|
embedding_dimension=model.metadata["embedding_dimension"],
|
||||||
|
provider_id=provider_id,
|
||||||
|
provider_vector_db_id=provider_vector_db_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_store_id = vector_store.id
|
||||||
|
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
|
||||||
|
logger.warning(
|
||||||
|
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
|
||||||
|
)
|
||||||
|
|
||||||
vector_db_data = {
|
vector_db_data = {
|
||||||
"identifier": vector_db_id,
|
"identifier": vector_store_id,
|
||||||
"type": ResourceType.vector_db.value,
|
"type": ResourceType.vector_db.value,
|
||||||
"provider_id": provider_id,
|
"provider_id": provider_id,
|
||||||
"provider_resource_id": provider_vector_db_id,
|
"provider_resource_id": actual_provider_vector_db_id,
|
||||||
"embedding_model": embedding_model,
|
"embedding_model": embedding_model,
|
||||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||||
"vector_db_name": vector_db_name,
|
"vector_db_name": vector_store.name,
|
||||||
}
|
}
|
||||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||||
await self.register_object(vector_db)
|
await self.register_object(vector_db)
|
||||||
|
|
|
@ -8,16 +8,18 @@ import ssl
|
||||||
import time
|
import time
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from asyncio import Lock
|
from asyncio import Lock
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urljoin, urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from jose import jwt
|
from jose import jwt
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.apis.common.errors import TokenValidationError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationConfig,
|
AuthenticationConfig,
|
||||||
CustomAuthConfig,
|
CustomAuthConfig,
|
||||||
GitHubTokenAuthConfig,
|
GitHubTokenAuthConfig,
|
||||||
|
KubernetesAuthProviderConfig,
|
||||||
OAuth2TokenAuthConfig,
|
OAuth2TokenAuthConfig,
|
||||||
User,
|
User,
|
||||||
)
|
)
|
||||||
|
@ -162,7 +164,7 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
||||||
auth=auth,
|
auth=auth,
|
||||||
timeout=10.0, # Add a reasonable timeout
|
timeout=10.0, # Add a reasonable timeout
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != httpx.codes.OK:
|
||||||
logger.warning(f"Token introspection failed with status code: {response.status_code}")
|
logger.warning(f"Token introspection failed with status code: {response.status_code}")
|
||||||
raise ValueError(f"Token introspection failed: {response.status_code}")
|
raise ValueError(f"Token introspection failed: {response.status_code}")
|
||||||
|
|
||||||
|
@ -272,7 +274,7 @@ class CustomAuthProvider(AuthProvider):
|
||||||
json=auth_request.model_dump(),
|
json=auth_request.model_dump(),
|
||||||
timeout=10.0, # Add a reasonable timeout
|
timeout=10.0, # Add a reasonable timeout
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != httpx.codes.OK:
|
||||||
logger.warning(f"Authentication failed with status code: {response.status_code}")
|
logger.warning(f"Authentication failed with status code: {response.status_code}")
|
||||||
raise ValueError(f"Authentication failed: {response.status_code}")
|
raise ValueError(f"Authentication failed: {response.status_code}")
|
||||||
|
|
||||||
|
@ -374,6 +376,89 @@ async def _get_github_user_info(access_token: str, github_api_base_url: str) ->
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KubernetesAuthProvider(AuthProvider):
|
||||||
|
"""
|
||||||
|
Kubernetes authentication provider that validates tokens using the Kubernetes SelfSubjectReview API.
|
||||||
|
This provider integrates with Kubernetes API server by using the
|
||||||
|
/apis/authentication.k8s.io/v1/selfsubjectreviews endpoint to validate tokens and extract user information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: KubernetesAuthProviderConfig):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def _httpx_verify_value(self) -> bool | str:
|
||||||
|
"""
|
||||||
|
Build the value for httpx's `verify` parameter.
|
||||||
|
- False disables verification.
|
||||||
|
- Path string points to a CA bundle.
|
||||||
|
- True uses system defaults.
|
||||||
|
"""
|
||||||
|
if not self.config.verify_tls:
|
||||||
|
return False
|
||||||
|
if self.config.tls_cafile:
|
||||||
|
return self.config.tls_cafile.as_posix()
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def validate_token(self, token: str, scope: dict | None = None) -> User:
|
||||||
|
"""Validate a token using Kubernetes SelfSubjectReview API endpoint."""
|
||||||
|
# Build the Kubernetes SelfSubjectReview API endpoint URL
|
||||||
|
review_api_url = urljoin(self.config.api_server_url, "/apis/authentication.k8s.io/v1/selfsubjectreviews")
|
||||||
|
|
||||||
|
# Create SelfSubjectReview request body
|
||||||
|
review_request = {"apiVersion": "authentication.k8s.io/v1", "kind": "SelfSubjectReview"}
|
||||||
|
verify = self._httpx_verify_value()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(verify=verify, timeout=10.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
review_api_url,
|
||||||
|
json=review_request,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == httpx.codes.UNAUTHORIZED:
|
||||||
|
raise TokenValidationError("Invalid token")
|
||||||
|
if response.status_code != httpx.codes.CREATED:
|
||||||
|
logger.warning(f"Kubernetes SelfSubjectReview API failed with status code: {response.status_code}")
|
||||||
|
raise TokenValidationError(f"Token validation failed: {response.status_code}")
|
||||||
|
|
||||||
|
review_response = response.json()
|
||||||
|
# Extract user information from SelfSubjectReview response
|
||||||
|
status = review_response.get("status", {})
|
||||||
|
if not status:
|
||||||
|
raise ValueError("No status found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
user_info = status.get("userInfo", {})
|
||||||
|
if not user_info:
|
||||||
|
raise ValueError("No userInfo found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
username = user_info.get("username")
|
||||||
|
if not username:
|
||||||
|
raise ValueError("No username found in SelfSubjectReview response")
|
||||||
|
|
||||||
|
# Build user attributes from Kubernetes user info
|
||||||
|
user_attributes = get_attributes_from_claims(user_info, self.config.claims_mapping)
|
||||||
|
|
||||||
|
return User(
|
||||||
|
principal=username,
|
||||||
|
attributes=user_attributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning("Kubernetes SelfSubjectReview API request timed out")
|
||||||
|
raise ValueError("Token validation timeout") from None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error during token validation: {str(e)}")
|
||||||
|
raise ValueError(f"Token validation error: {str(e)}") from e
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close any resources."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
||||||
"""Factory function to create the appropriate auth provider."""
|
"""Factory function to create the appropriate auth provider."""
|
||||||
provider_config = config.provider_config
|
provider_config = config.provider_config
|
||||||
|
@ -384,5 +469,7 @@ def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
|
||||||
return OAuth2TokenAuthProvider(provider_config)
|
return OAuth2TokenAuthProvider(provider_config)
|
||||||
elif isinstance(provider_config, GitHubTokenAuthConfig):
|
elif isinstance(provider_config, GitHubTokenAuthConfig):
|
||||||
return GitHubTokenAuthProvider(provider_config)
|
return GitHubTokenAuthProvider(provider_config)
|
||||||
|
elif isinstance(provider_config, KubernetesAuthProviderConfig):
|
||||||
|
return KubernetesAuthProvider(provider_config)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown authentication provider config type: {type(provider_config)}")
|
raise ValueError(f"Unknown authentication provider config type: {type(provider_config)}")
|
||||||
|
|
|
@ -132,15 +132,17 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
elif isinstance(exc, ConflictError):
|
elif isinstance(exc, ConflictError):
|
||||||
return HTTPException(status_code=409, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.CONFLICT, detail=str(exc))
|
||||||
elif isinstance(exc, ResourceNotFoundError):
|
elif isinstance(exc, ResourceNotFoundError):
|
||||||
return HTTPException(status_code=404, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.NOT_FOUND, detail=str(exc))
|
||||||
elif isinstance(exc, ValueError):
|
elif isinstance(exc, ValueError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
|
||||||
elif isinstance(exc, BadRequestError):
|
elif isinstance(exc, BadRequestError):
|
||||||
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
|
||||||
elif isinstance(exc, PermissionError | AccessDeniedError):
|
elif isinstance(exc, PermissionError | AccessDeniedError):
|
||||||
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
|
||||||
|
elif isinstance(exc, ConnectionError | httpx.ConnectError):
|
||||||
|
return HTTPException(status_code=httpx.codes.BAD_GATEWAY, detail=str(exc))
|
||||||
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
||||||
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
|
||||||
elif isinstance(exc, NotImplementedError):
|
elif isinstance(exc, NotImplementedError):
|
||||||
|
@ -513,6 +515,7 @@ def main(args: argparse.Namespace | None = None):
|
||||||
|
|
||||||
apis_to_serve.add("inspect")
|
apis_to_serve.add("inspect")
|
||||||
apis_to_serve.add("providers")
|
apis_to_serve.add("providers")
|
||||||
|
apis_to_serve.add("prompts")
|
||||||
for api_str in apis_to_serve:
|
for api_str in apis_to_serve:
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
from llama_stack.apis.prompts import Prompts
|
||||||
from llama_stack.apis.providers import Providers
|
from llama_stack.apis.providers import Providers
|
||||||
from llama_stack.apis.safety import Safety
|
from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.scoring import Scoring
|
from llama_stack.apis.scoring import Scoring
|
||||||
|
@ -37,6 +38,7 @@ from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.core.datatypes import Provider, StackRunConfig
|
from llama_stack.core.datatypes import Provider, StackRunConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||||
|
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
||||||
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
||||||
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
||||||
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
||||||
|
@ -72,6 +74,7 @@ class LlamaStack(
|
||||||
ToolRuntime,
|
ToolRuntime,
|
||||||
RAGToolRuntime,
|
RAGToolRuntime,
|
||||||
Files,
|
Files,
|
||||||
|
Prompts,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -305,6 +308,12 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
||||||
)
|
)
|
||||||
impls[Api.providers] = providers_impl
|
impls[Api.providers] = providers_impl
|
||||||
|
|
||||||
|
prompts_impl = PromptServiceImpl(
|
||||||
|
PromptServiceConfig(run_config=run_config),
|
||||||
|
deps=impls,
|
||||||
|
)
|
||||||
|
impls[Api.prompts] = prompts_impl
|
||||||
|
|
||||||
|
|
||||||
# Produces a stack of providers for the given run config. Not all APIs may be
|
# Produces a stack of providers for the given run config. Not all APIs may be
|
||||||
# asked for in the run config.
|
# asked for in the run config.
|
||||||
|
@ -329,6 +338,9 @@ async def construct_stack(
|
||||||
# Add internal implementations after all other providers are resolved
|
# Add internal implementations after all other providers are resolved
|
||||||
add_internal_implementations(impls, run_config)
|
add_internal_implementations(impls, run_config)
|
||||||
|
|
||||||
|
if Api.prompts in impls:
|
||||||
|
await impls[Api.prompts].initialize()
|
||||||
|
|
||||||
await register_resources(run_config, impls)
|
await register_resources(run_config, impls)
|
||||||
|
|
||||||
await refresh_registry_once(impls)
|
await refresh_registry_once(impls)
|
||||||
|
|
|
@ -17,6 +17,7 @@ distribution_spec:
|
||||||
- provider_type: remote::vertexai
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
|
- provider_type: remote::azure
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_type: inline::faiss
|
- provider_type: inline::faiss
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="ci-tests")
|
||||||
name = "ci-tests"
|
|
||||||
template.name = name
|
|
||||||
template.description = "CI tests for Llama Stack"
|
template.description = "CI tests for Llama Stack"
|
||||||
|
|
||||||
return template
|
return template
|
||||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
||||||
config:
|
config:
|
||||||
url: https://api.sambanova.ai/v1
|
url: https://api.sambanova.ai/v1
|
||||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||||
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||||
|
provider_type: remote::azure
|
||||||
|
config:
|
||||||
|
api_key: ${env.AZURE_API_KEY:=}
|
||||||
|
api_base: ${env.AZURE_API_BASE:=}
|
||||||
|
api_version: ${env.AZURE_API_VERSION:=}
|
||||||
|
api_type: ${env.AZURE_API_TYPE:=}
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
|
@ -89,28 +96,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +128,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -18,6 +18,7 @@ distribution_spec:
|
||||||
- provider_type: remote::vertexai
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
|
- provider_type: remote::azure
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_type: inline::faiss
|
- provider_type: inline::faiss
|
||||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
||||||
config:
|
config:
|
||||||
url: https://api.sambanova.ai/v1
|
url: https://api.sambanova.ai/v1
|
||||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||||
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||||
|
provider_type: remote::azure
|
||||||
|
config:
|
||||||
|
api_key: ${env.AZURE_API_KEY:=}
|
||||||
|
api_base: ${env.AZURE_API_BASE:=}
|
||||||
|
api_version: ${env.AZURE_API_VERSION:=}
|
||||||
|
api_type: ${env.AZURE_API_TYPE:=}
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
|
@ -89,28 +96,28 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
|
||||||
- provider_id: sqlite-vec
|
- provider_id: sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
config:
|
config:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
|
||||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||||
provider_type: inline::milvus
|
provider_type: inline::milvus
|
||||||
config:
|
config:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
|
||||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
|
||||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||||
provider_type: remote::pgvector
|
provider_type: remote::pgvector
|
||||||
config:
|
config:
|
||||||
|
@ -121,15 +128,15 @@ providers:
|
||||||
password: ${env.PGVECTOR_PASSWORD:=}
|
password: ${env.PGVECTOR_PASSWORD:=}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
|
||||||
files:
|
files:
|
||||||
- provider_id: meta-reference-files
|
- provider_id: meta-reference-files
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -11,9 +11,7 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
template = get_starter_distribution_template()
|
template = get_starter_distribution_template(name="starter-gpu")
|
||||||
name = "starter-gpu"
|
|
||||||
template.name = name
|
|
||||||
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
||||||
|
|
||||||
template.providers["post_training"] = [
|
template.providers["post_training"] = [
|
||||||
|
|
|
@ -18,6 +18,7 @@ distribution_spec:
|
||||||
- provider_type: remote::vertexai
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
|
- provider_type: remote::azure
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_type: inline::faiss
|
- provider_type: inline::faiss
|
||||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
||||||
config:
|
config:
|
||||||
url: https://api.sambanova.ai/v1
|
url: https://api.sambanova.ai/v1
|
||||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||||
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||||
|
provider_type: remote::azure
|
||||||
|
config:
|
||||||
|
api_key: ${env.AZURE_API_KEY:=}
|
||||||
|
api_base: ${env.AZURE_API_BASE:=}
|
||||||
|
api_version: ${env.AZURE_API_VERSION:=}
|
||||||
|
api_type: ${env.AZURE_API_TYPE:=}
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
vector_io:
|
vector_io:
|
||||||
|
|
|
@ -59,6 +59,7 @@ ENABLED_INFERENCE_PROVIDERS = [
|
||||||
"cerebras",
|
"cerebras",
|
||||||
"nvidia",
|
"nvidia",
|
||||||
"bedrock",
|
"bedrock",
|
||||||
|
"azure",
|
||||||
]
|
]
|
||||||
|
|
||||||
INFERENCE_PROVIDER_IDS = {
|
INFERENCE_PROVIDER_IDS = {
|
||||||
|
@ -68,6 +69,7 @@ INFERENCE_PROVIDER_IDS = {
|
||||||
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
||||||
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
||||||
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
|
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
|
||||||
|
"azure": "${env.AZURE_API_KEY:+azure}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,9 +101,8 @@ def get_remote_inference_providers() -> list[Provider]:
|
||||||
return inference_providers
|
return inference_providers
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
remote_inference_providers = get_remote_inference_providers()
|
remote_inference_providers = get_remote_inference_providers()
|
||||||
name = "starter"
|
|
||||||
|
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
||||||
|
@ -278,5 +279,21 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"http://localhost:11434",
|
"http://localhost:11434",
|
||||||
"Ollama URL",
|
"Ollama URL",
|
||||||
),
|
),
|
||||||
|
"AZURE_API_KEY": (
|
||||||
|
"",
|
||||||
|
"Azure API Key",
|
||||||
|
),
|
||||||
|
"AZURE_API_BASE": (
|
||||||
|
"",
|
||||||
|
"Azure API Base",
|
||||||
|
),
|
||||||
|
"AZURE_API_VERSION": (
|
||||||
|
"",
|
||||||
|
"Azure API Version",
|
||||||
|
),
|
||||||
|
"AZURE_API_TYPE": (
|
||||||
|
"azure",
|
||||||
|
"Azure API Type",
|
||||||
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -178,9 +178,9 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
# TODO: set expiration time for garbage collection
|
# TODO: set expiration time for garbage collection
|
||||||
|
|
||||||
if endpoint not in ["/v1/chat/completions"]:
|
if endpoint not in ["/v1/chat/completions", "/v1/completions"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions. Code: invalid_value. Param: endpoint",
|
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions. Code: invalid_value. Param: endpoint",
|
||||||
)
|
)
|
||||||
|
|
||||||
if completion_window != "24h":
|
if completion_window != "24h":
|
||||||
|
@ -424,13 +424,21 @@ class ReferenceBatchesImpl(Batches):
|
||||||
)
|
)
|
||||||
valid = False
|
valid = False
|
||||||
|
|
||||||
for param, expected_type, type_string in [
|
if batch.endpoint == "/v1/chat/completions":
|
||||||
|
required_params = [
|
||||||
("model", str, "a string"),
|
("model", str, "a string"),
|
||||||
# messages is specific to /v1/chat/completions
|
# messages is specific to /v1/chat/completions
|
||||||
# we could skip validating messages here and let inference fail. however,
|
# we could skip validating messages here and let inference fail. however,
|
||||||
# that would be a very expensive way to find out messages is wrong.
|
# that would be a very expensive way to find out messages is wrong.
|
||||||
("messages", list, "an array"), # TODO: allow messages to be a string?
|
("messages", list, "an array"), # TODO: allow messages to be a string?
|
||||||
]:
|
]
|
||||||
|
else: # /v1/completions
|
||||||
|
required_params = [
|
||||||
|
("model", str, "a string"),
|
||||||
|
("prompt", str, "a string"), # TODO: allow prompt to be a list of strings??
|
||||||
|
]
|
||||||
|
|
||||||
|
for param, expected_type, type_string in required_params:
|
||||||
if param not in body:
|
if param not in body:
|
||||||
errors.append(
|
errors.append(
|
||||||
BatchError(
|
BatchError(
|
||||||
|
@ -591,6 +599,7 @@ class ReferenceBatchesImpl(Batches):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# TODO(SECURITY): review body for security issues
|
# TODO(SECURITY): review body for security issues
|
||||||
|
if request.url == "/v1/chat/completions":
|
||||||
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
||||||
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
chat_response = await self.inference_api.openai_chat_completion(**request.body)
|
||||||
|
|
||||||
|
@ -605,6 +614,22 @@ class ReferenceBatchesImpl(Batches):
|
||||||
"body": chat_response.model_dump_json(),
|
"body": chat_response.model_dump_json(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
else: # /v1/completions
|
||||||
|
completion_response = await self.inference_api.openai_completion(**request.body)
|
||||||
|
|
||||||
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
|
assert hasattr(completion_response, "model_dump_json"), (
|
||||||
|
"Completion response must have model_dump_json method"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"id": request_id,
|
||||||
|
"custom_id": request.custom_id,
|
||||||
|
"response": {
|
||||||
|
"status_code": 200,
|
||||||
|
"request_id": request_id,
|
||||||
|
"body": completion_response.model_dump_json(),
|
||||||
|
},
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
logger.info(f"Error processing request {request.custom_id} in batch {batch_id}: {e}")
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -14,6 +14,6 @@ from .config import RagToolRuntimeConfig
|
||||||
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
|
||||||
from .memory import MemoryToolRuntimeImpl
|
from .memory import MemoryToolRuntimeImpl
|
||||||
|
|
||||||
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
|
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import InterleavedContent
|
from llama_stack.apis.common.content_types import InterleavedContent
|
||||||
from llama_stack.apis.inference import UserMessage
|
from llama_stack.apis.inference import OpenAIUserMessageParam
|
||||||
from llama_stack.apis.tools.rag_tool import (
|
from llama_stack.apis.tools.rag_tool import (
|
||||||
DefaultRAGQueryGeneratorConfig,
|
DefaultRAGQueryGeneratorConfig,
|
||||||
LLMRAGQueryGeneratorConfig,
|
LLMRAGQueryGeneratorConfig,
|
||||||
|
@ -61,16 +61,16 @@ async def llm_rag_query_generator(
|
||||||
messages = [interleaved_content_as_str(content)]
|
messages = [interleaved_content_as_str(content)]
|
||||||
|
|
||||||
template = Template(config.template)
|
template = Template(config.template)
|
||||||
content = template.render({"messages": messages})
|
rendered_content: str = template.render({"messages": messages})
|
||||||
|
|
||||||
model = config.model
|
model = config.model
|
||||||
message = UserMessage(content=content)
|
message = OpenAIUserMessageParam(content=rendered_content)
|
||||||
response = await inference_api.chat_completion(
|
response = await inference_api.openai_chat_completion(
|
||||||
model_id=model,
|
model=model,
|
||||||
messages=[message],
|
messages=[message],
|
||||||
stream=False,
|
stream=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
query = response.completion_message.content
|
query = response.choices[0].message.content
|
||||||
|
|
||||||
return query
|
return query
|
||||||
|
|
|
@ -5,10 +5,15 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import mimetypes
|
||||||
import secrets
|
import secrets
|
||||||
import string
|
import string
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import UploadFile
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
|
@ -17,6 +22,7 @@ from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContentItem,
|
InterleavedContentItem,
|
||||||
TextContentItem,
|
TextContentItem,
|
||||||
)
|
)
|
||||||
|
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.tools import (
|
from llama_stack.apis.tools import (
|
||||||
ListToolDefsResponse,
|
ListToolDefsResponse,
|
||||||
|
@ -30,14 +36,16 @@ from llama_stack.apis.tools import (
|
||||||
ToolParameter,
|
ToolParameter,
|
||||||
ToolRuntime,
|
ToolRuntime,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import (
|
||||||
|
QueryChunksResponse,
|
||||||
|
VectorIO,
|
||||||
|
VectorStoreChunkingStrategyStatic,
|
||||||
|
VectorStoreChunkingStrategyStaticConfig,
|
||||||
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||||
content_from_doc,
|
|
||||||
make_overlapped_chunks,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .config import RagToolRuntimeConfig
|
from .config import RagToolRuntimeConfig
|
||||||
from .context_retriever import generate_rag_query
|
from .context_retriever import generate_rag_query
|
||||||
|
@ -49,16 +57,59 @@ def make_random_string(length: int = 8):
|
||||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||||
|
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||||
|
if isinstance(doc.content, URL):
|
||||||
|
if doc.content.uri.startswith("data:"):
|
||||||
|
parts = parse_data_url(doc.content.uri)
|
||||||
|
mime_type = parts["mimetype"]
|
||||||
|
data = parts["data"]
|
||||||
|
|
||||||
|
if parts["is_base64"]:
|
||||||
|
file_data = base64.b64decode(data)
|
||||||
|
else:
|
||||||
|
file_data = data.encode("utf-8")
|
||||||
|
|
||||||
|
return file_data, mime_type
|
||||||
|
else:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
r = await client.get(doc.content.uri)
|
||||||
|
r.raise_for_status()
|
||||||
|
mime_type = r.headers.get("content-type", "application/octet-stream")
|
||||||
|
return r.content, mime_type
|
||||||
|
else:
|
||||||
|
if isinstance(doc.content, str):
|
||||||
|
content_str = doc.content
|
||||||
|
else:
|
||||||
|
content_str = interleaved_content_as_str(doc.content)
|
||||||
|
|
||||||
|
if content_str.startswith("data:"):
|
||||||
|
parts = parse_data_url(content_str)
|
||||||
|
mime_type = parts["mimetype"]
|
||||||
|
data = parts["data"]
|
||||||
|
|
||||||
|
if parts["is_base64"]:
|
||||||
|
file_data = base64.b64decode(data)
|
||||||
|
else:
|
||||||
|
file_data = data.encode("utf-8")
|
||||||
|
|
||||||
|
return file_data, mime_type
|
||||||
|
else:
|
||||||
|
return content_str.encode("utf-8"), "text/plain"
|
||||||
|
|
||||||
|
|
||||||
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: RagToolRuntimeConfig,
|
config: RagToolRuntimeConfig,
|
||||||
vector_io_api: VectorIO,
|
vector_io_api: VectorIO,
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
|
files_api: Files,
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.vector_io_api = vector_io_api
|
self.vector_io_api = vector_io_api
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
self.files_api = files_api
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
pass
|
pass
|
||||||
|
@ -78,27 +129,56 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
vector_db_id: str,
|
vector_db_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
chunks = []
|
if not documents:
|
||||||
for doc in documents:
|
|
||||||
content = await content_from_doc(doc)
|
|
||||||
# TODO: we should add enrichment here as URLs won't be added to the metadata by default
|
|
||||||
chunks.extend(
|
|
||||||
make_overlapped_chunks(
|
|
||||||
doc.document_id,
|
|
||||||
content,
|
|
||||||
chunk_size_in_tokens,
|
|
||||||
chunk_size_in_tokens // 4,
|
|
||||||
doc.metadata,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not chunks:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
await self.vector_io_api.insert_chunks(
|
for doc in documents:
|
||||||
chunks=chunks,
|
try:
|
||||||
vector_db_id=vector_db_id,
|
try:
|
||||||
|
file_data, mime_type = await raw_data_from_doc(doc)
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Failed to extract content from document {doc.document_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||||
|
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||||
|
|
||||||
|
file_obj = io.BytesIO(file_data)
|
||||||
|
file_obj.name = filename
|
||||||
|
|
||||||
|
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
created_file = await self.files_api.openai_upload_file(
|
||||||
|
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||||
|
static=VectorStoreChunkingStrategyStaticConfig(
|
||||||
|
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||||
|
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||||
|
vector_store_id=vector_db_id,
|
||||||
|
file_id=created_file.id,
|
||||||
|
attributes=doc.metadata,
|
||||||
|
chunking_strategy=chunking_strategy,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.error(
|
||||||
|
f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Unexpected error processing document {doc.document_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
|
@ -131,8 +211,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
for vector_db_id in vector_db_ids
|
for vector_db_id in vector_db_ids
|
||||||
]
|
]
|
||||||
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
||||||
chunks = [c for r in results for c in r.chunks]
|
|
||||||
scores = [s for r in results for s in r.scores]
|
chunks = []
|
||||||
|
scores = []
|
||||||
|
|
||||||
|
for vector_db_id, result in zip(vector_db_ids, results, strict=False):
|
||||||
|
for chunk, score in zip(result.chunks, result.scores, strict=False):
|
||||||
|
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
||||||
|
chunk.metadata = {}
|
||||||
|
chunk.metadata["vector_db_id"] = vector_db_id
|
||||||
|
|
||||||
|
chunks.append(chunk)
|
||||||
|
scores.append(score)
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return RAGQueryResult(content=None)
|
return RAGQueryResult(content=None)
|
||||||
|
@ -167,6 +257,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
metadata_keys_to_exclude_from_context = [
|
metadata_keys_to_exclude_from_context = [
|
||||||
"token_count",
|
"token_count",
|
||||||
"metadata_token_count",
|
"metadata_token_count",
|
||||||
|
"vector_db_id",
|
||||||
]
|
]
|
||||||
metadata_for_context = {}
|
metadata_for_context = {}
|
||||||
for k in chunk_metadata_keys_to_include_from_context:
|
for k in chunk_metadata_keys_to_include_from_context:
|
||||||
|
@ -191,6 +282,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
"document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
|
"document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
|
||||||
"chunks": [c.content for c in chunks[: len(picked)]],
|
"chunks": [c.content for c in chunks[: len(picked)]],
|
||||||
"scores": scores[: len(picked)],
|
"scores": scores[: len(picked)],
|
||||||
|
"vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -226,7 +318,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
if query_config:
|
if query_config:
|
||||||
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
||||||
else:
|
else:
|
||||||
# handle someone passing an empty dict
|
|
||||||
query_config = RAGQueryConfig()
|
query_config = RAGQueryConfig()
|
||||||
|
|
||||||
query = kwargs["query"]
|
query = kwargs["query"]
|
||||||
|
@ -237,6 +328,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
)
|
)
|
||||||
|
|
||||||
return ToolInvocationResult(
|
return ToolInvocationResult(
|
||||||
content=result.content,
|
content=result.content or [],
|
||||||
metadata=result.metadata,
|
metadata=result.metadata,
|
||||||
)
|
)
|
||||||
|
|
|
@ -30,11 +30,11 @@ from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
RERANKER_TYPE_RRF,
|
RERANKER_TYPE_RRF,
|
||||||
RERANKER_TYPE_WEIGHTED,
|
|
||||||
ChunkForDeletion,
|
ChunkForDeletion,
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="vector_io")
|
logger = get_logger(name=__name__, category="vector_io")
|
||||||
|
|
||||||
|
@ -66,59 +66,6 @@ def _create_sqlite_connection(db_path):
|
||||||
return connection
|
return connection
|
||||||
|
|
||||||
|
|
||||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
|
||||||
"""Normalize scores to [0,1] range using min-max normalization."""
|
|
||||||
if not scores:
|
|
||||||
return {}
|
|
||||||
min_score = min(scores.values())
|
|
||||||
max_score = max(scores.values())
|
|
||||||
score_range = max_score - min_score
|
|
||||||
if score_range > 0:
|
|
||||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
|
||||||
return dict.fromkeys(scores, 1.0)
|
|
||||||
|
|
||||||
|
|
||||||
def _weighted_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
alpha: float = 0.5,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses weighted average of scores."""
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
normalized_vector_scores = _normalize_scores(vector_scores)
|
|
||||||
normalized_keyword_scores = _normalize_scores(keyword_scores)
|
|
||||||
|
|
||||||
return {
|
|
||||||
doc_id: (alpha * normalized_keyword_scores.get(doc_id, 0.0))
|
|
||||||
+ ((1 - alpha) * normalized_vector_scores.get(doc_id, 0.0))
|
|
||||||
for doc_id in all_ids
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _rrf_rerank(
|
|
||||||
vector_scores: dict[str, float],
|
|
||||||
keyword_scores: dict[str, float],
|
|
||||||
impact_factor: float = 60.0,
|
|
||||||
) -> dict[str, float]:
|
|
||||||
"""ReRanker that uses Reciprocal Rank Fusion."""
|
|
||||||
# Convert scores to ranks
|
|
||||||
vector_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
keyword_ranks = {
|
|
||||||
doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
|
|
||||||
}
|
|
||||||
|
|
||||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
|
||||||
rrf_scores = {}
|
|
||||||
for doc_id in all_ids:
|
|
||||||
vector_rank = vector_ranks.get(doc_id, float("inf"))
|
|
||||||
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
|
|
||||||
# RRF formula: score = 1/(k + r) where k is impact_factor and r is the rank
|
|
||||||
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
|
|
||||||
return rrf_scores
|
|
||||||
|
|
||||||
|
|
||||||
def _make_sql_identifier(name: str) -> str:
|
def _make_sql_identifier(name: str) -> str:
|
||||||
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
||||||
|
|
||||||
|
@ -398,14 +345,10 @@ class SQLiteVecIndex(EmbeddingIndex):
|
||||||
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Combine scores using the specified reranker
|
# Combine scores using the reranking utility
|
||||||
if reranker_type == RERANKER_TYPE_WEIGHTED:
|
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
||||||
alpha = reranker_params.get("alpha", 0.5)
|
vector_scores, keyword_scores, reranker_type, reranker_params
|
||||||
combined_scores = _weighted_rerank(vector_scores, keyword_scores, alpha)
|
)
|
||||||
else:
|
|
||||||
# Default to RRF for None, RRF, or any unknown types
|
|
||||||
impact_factor = reranker_params.get("impact_factor", 60.0)
|
|
||||||
combined_scores = _rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
|
||||||
|
|
||||||
# Sort by combined score and get top k results
|
# Sort by combined score and get top k results
|
||||||
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
|
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.batches,
|
api=Api.batches,
|
||||||
provider_type="inline::reference",
|
provider_type="inline::reference",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.inline.batches.reference",
|
module="llama_stack.providers.inline.batches.reference",
|
||||||
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -30,7 +30,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="huggingface",
|
adapter_type="huggingface",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"datasets",
|
"datasets>=4.0.0",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.huggingface",
|
module="llama_stack.providers.remote.datasetio.huggingface",
|
||||||
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
||||||
|
@ -42,7 +42,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="nvidia",
|
adapter_type="nvidia",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"datasets",
|
"datasets>=4.0.0",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.nvidia",
|
module="llama_stack.providers.remote.datasetio.nvidia",
|
||||||
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
||||||
|
|
|
@ -75,7 +75,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="vllm",
|
adapter_type="vllm",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.vllm",
|
module="llama_stack.providers.remote.inference.vllm",
|
||||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||||
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||||
|
@ -116,7 +116,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="fireworks",
|
adapter_type="fireworks",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"fireworks-ai<=0.18.0",
|
"fireworks-ai<=0.17.16",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
|
@ -151,9 +151,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="databricks",
|
adapter_type="databricks",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"openai",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.databricks",
|
module="llama_stack.providers.remote.inference.databricks",
|
||||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||||
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||||
|
@ -163,9 +161,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="nvidia",
|
adapter_type="nvidia",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"openai",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.nvidia",
|
module="llama_stack.providers.remote.inference.nvidia",
|
||||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||||
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||||
|
@ -175,7 +171,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="runpod",
|
adapter_type="runpod",
|
||||||
pip_packages=["openai"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.runpod",
|
module="llama_stack.providers.remote.inference.runpod",
|
||||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||||
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||||
|
@ -292,11 +288,26 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="watsonx",
|
adapter_type="watsonx",
|
||||||
pip_packages=["ibm_watson_machine_learning"],
|
pip_packages=["ibm_watsonx_ai"],
|
||||||
module="llama_stack.providers.remote.inference.watsonx",
|
module="llama_stack.providers.remote.inference.watsonx",
|
||||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||||
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
remote_provider_spec(
|
||||||
|
api=Api.inference,
|
||||||
|
adapter=AdapterSpec(
|
||||||
|
adapter_type="azure",
|
||||||
|
pip_packages=["litellm"],
|
||||||
|
module="llama_stack.providers.remote.inference.azure",
|
||||||
|
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
||||||
|
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
||||||
|
description="""
|
||||||
|
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
||||||
|
Provider documentation
|
||||||
|
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||||
|
""",
|
||||||
|
),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -48,7 +48,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
provider_type="inline::huggingface-gpu",
|
provider_type="inline::huggingface-gpu",
|
||||||
pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
|
pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"],
|
||||||
module="llama_stack.providers.inline.post_training.huggingface",
|
module="llama_stack.providers.inline.post_training.huggingface",
|
||||||
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
|
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -38,7 +38,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.scoring,
|
api=Api.scoring,
|
||||||
provider_type="inline::braintrust",
|
provider_type="inline::braintrust",
|
||||||
pip_packages=["autoevals", "openai"],
|
pip_packages=["autoevals"],
|
||||||
module="llama_stack.providers.inline.scoring.braintrust",
|
module="llama_stack.providers.inline.scoring.braintrust",
|
||||||
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
|
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
|
|
|
@ -32,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.tool_runtime.rag",
|
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||||
api_dependencies=[Api.vector_io, Api.inference],
|
api_dependencies=[Api.vector_io, Api.inference, Api.files],
|
||||||
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
|
|
@ -5,12 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import AnthropicConfig
|
from .config import AnthropicConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class AnthropicInferenceAdapter(LiteLLMOpenAIMixin):
|
class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: AnthropicConfig) -> None:
|
def __init__(self, config: AnthropicConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -26,3 +27,8 @@ class AnthropicInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self):
|
||||||
|
return "https://api.anthropic.com/v1"
|
||||||
|
|
15
llama_stack/providers/remote/inference/azure/__init__.py
Normal file
15
llama_stack/providers/remote/inference/azure/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .config import AzureConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_adapter_impl(config: AzureConfig, _deps):
|
||||||
|
from .azure import AzureInferenceAdapter
|
||||||
|
|
||||||
|
impl = AzureInferenceAdapter(config)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
64
llama_stack/providers/remote/inference/azure/azure.py
Normal file
64
llama_stack/providers/remote/inference/azure/azure.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
|
LiteLLMOpenAIMixin,
|
||||||
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
from .config import AzureConfig
|
||||||
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
|
class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
|
def __init__(self, config: AzureConfig) -> None:
|
||||||
|
LiteLLMOpenAIMixin.__init__(
|
||||||
|
self,
|
||||||
|
MODEL_ENTRIES,
|
||||||
|
litellm_provider_name="azure",
|
||||||
|
api_key_from_config=config.api_key.get_secret_value(),
|
||||||
|
provider_data_api_key_field="azure_api_key",
|
||||||
|
openai_compat_api_base=str(config.api_base),
|
||||||
|
)
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the Azure API base URL.
|
||||||
|
|
||||||
|
Returns the Azure API base URL from the configuration.
|
||||||
|
"""
|
||||||
|
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||||
|
|
||||||
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
|
# Get base parameters from parent
|
||||||
|
params = await super()._get_params(request)
|
||||||
|
|
||||||
|
# Add Azure specific parameters
|
||||||
|
provider_data = self.get_request_provider_data()
|
||||||
|
if provider_data:
|
||||||
|
if getattr(provider_data, "azure_api_key", None):
|
||||||
|
params["api_key"] = provider_data.azure_api_key
|
||||||
|
if getattr(provider_data, "azure_api_base", None):
|
||||||
|
params["api_base"] = provider_data.azure_api_base
|
||||||
|
if getattr(provider_data, "azure_api_version", None):
|
||||||
|
params["api_version"] = provider_data.azure_api_version
|
||||||
|
if getattr(provider_data, "azure_api_type", None):
|
||||||
|
params["api_type"] = provider_data.azure_api_type
|
||||||
|
else:
|
||||||
|
params["api_key"] = self.config.api_key.get_secret_value()
|
||||||
|
params["api_base"] = str(self.config.api_base)
|
||||||
|
params["api_version"] = self.config.api_version
|
||||||
|
params["api_type"] = self.config.api_type
|
||||||
|
|
||||||
|
return params
|
63
llama_stack/providers/remote/inference/azure/config.py
Normal file
63
llama_stack/providers/remote/inference/azure/config.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
class AzureProviderDataValidator(BaseModel):
|
||||||
|
azure_api_key: SecretStr = Field(
|
||||||
|
description="Azure API key for Azure",
|
||||||
|
)
|
||||||
|
azure_api_base: HttpUrl = Field(
|
||||||
|
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||||
|
)
|
||||||
|
azure_api_version: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Azure API version for Azure (e.g., 2024-06-01)",
|
||||||
|
)
|
||||||
|
azure_api_type: str | None = Field(
|
||||||
|
default="azure",
|
||||||
|
description="Azure API type for Azure (e.g., azure)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class AzureConfig(BaseModel):
|
||||||
|
api_key: SecretStr = Field(
|
||||||
|
description="Azure API key for Azure",
|
||||||
|
)
|
||||||
|
api_base: HttpUrl = Field(
|
||||||
|
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||||
|
)
|
||||||
|
api_version: str | None = Field(
|
||||||
|
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
||||||
|
description="Azure API version for Azure (e.g., 2024-12-01-preview)",
|
||||||
|
)
|
||||||
|
api_type: str | None = Field(
|
||||||
|
default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"),
|
||||||
|
description="Azure API type for Azure (e.g., azure)",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(
|
||||||
|
cls,
|
||||||
|
api_key: str = "${env.AZURE_API_KEY:=}",
|
||||||
|
api_base: str = "${env.AZURE_API_BASE:=}",
|
||||||
|
api_version: str = "${env.AZURE_API_VERSION:=}",
|
||||||
|
api_type: str = "${env.AZURE_API_TYPE:=}",
|
||||||
|
**kwargs,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"api_key": api_key,
|
||||||
|
"api_base": api_base,
|
||||||
|
"api_version": api_version,
|
||||||
|
"api_type": api_type,
|
||||||
|
}
|
28
llama_stack/providers/remote/inference/azure/models.py
Normal file
28
llama_stack/providers/remote/inference/azure/models.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
ProviderModelEntry,
|
||||||
|
)
|
||||||
|
|
||||||
|
# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions
|
||||||
|
LLM_MODEL_IDS = [
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5-mini",
|
||||||
|
"gpt-5-nano",
|
||||||
|
"gpt-5-chat",
|
||||||
|
"o1",
|
||||||
|
"o1-mini",
|
||||||
|
"o3-mini",
|
||||||
|
"o4-mini",
|
||||||
|
"gpt-4.1",
|
||||||
|
"gpt-4.1-mini",
|
||||||
|
"gpt-4.1-nano",
|
||||||
|
]
|
||||||
|
|
||||||
|
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
|
||||||
|
|
||||||
|
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
|
|
@ -53,6 +53,43 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
REGION_PREFIX_MAP = {
|
||||||
|
"us": "us.",
|
||||||
|
"eu": "eu.",
|
||||||
|
"ap": "ap.",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_region_prefix(region: str | None) -> str:
|
||||||
|
# AWS requires region prefixes for inference profiles
|
||||||
|
if region is None:
|
||||||
|
return "us." # default to US when we don't know
|
||||||
|
|
||||||
|
# Handle case insensitive region matching
|
||||||
|
region_lower = region.lower()
|
||||||
|
for prefix in REGION_PREFIX_MAP:
|
||||||
|
if region_lower.startswith(f"{prefix}-"):
|
||||||
|
return REGION_PREFIX_MAP[prefix]
|
||||||
|
|
||||||
|
# Fallback to US for anything we don't recognize
|
||||||
|
return "us."
|
||||||
|
|
||||||
|
|
||||||
|
def _to_inference_profile_id(model_id: str, region: str = None) -> str:
|
||||||
|
# Return ARNs unchanged
|
||||||
|
if model_id.startswith("arn:"):
|
||||||
|
return model_id
|
||||||
|
|
||||||
|
# Return inference profile IDs that already have regional prefixes
|
||||||
|
if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
|
||||||
|
return model_id
|
||||||
|
|
||||||
|
# Default to US East when no region is provided
|
||||||
|
if region is None:
|
||||||
|
region = "us-east-1"
|
||||||
|
|
||||||
|
return _get_region_prefix(region) + model_id
|
||||||
|
|
||||||
|
|
||||||
class BedrockInferenceAdapter(
|
class BedrockInferenceAdapter(
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
|
@ -166,8 +203,13 @@ class BedrockInferenceAdapter(
|
||||||
options["repetition_penalty"] = sampling_params.repetition_penalty
|
options["repetition_penalty"] = sampling_params.repetition_penalty
|
||||||
|
|
||||||
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
||||||
|
|
||||||
|
# Convert foundation model ID to inference profile ID
|
||||||
|
region_name = self.client.meta.region_name
|
||||||
|
inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"modelId": bedrock_model,
|
"modelId": inference_profile_id,
|
||||||
"body": json.dumps(
|
"body": json.dumps(
|
||||||
{
|
{
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
@ -185,6 +227,11 @@ class BedrockInferenceAdapter(
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
model = await self.model_store.get_model(model_id)
|
model = await self.model_store.get_model(model_id)
|
||||||
|
|
||||||
|
# Convert foundation model ID to inference profile ID
|
||||||
|
region_name = self.client.meta.region_name
|
||||||
|
inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
|
||||||
|
|
||||||
embeddings = []
|
embeddings = []
|
||||||
for content in contents:
|
for content in contents:
|
||||||
assert not content_has_media(content), "Bedrock does not support media for embeddings"
|
assert not content_has_media(content), "Bedrock does not support media for embeddings"
|
||||||
|
@ -193,7 +240,7 @@ class BedrockInferenceAdapter(
|
||||||
body = json.dumps(input_body)
|
body = json.dumps(input_body)
|
||||||
response = self.client.invoke_model(
|
response = self.client.invoke_model(
|
||||||
body=body,
|
body=body,
|
||||||
modelId=model.provider_resource_id,
|
modelId=inference_profile_id,
|
||||||
accept="application/json",
|
accept="application/json",
|
||||||
contentType="application/json",
|
contentType="application/json",
|
||||||
)
|
)
|
||||||
|
|
|
@ -5,12 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import GeminiConfig
|
from .config import GeminiConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: GeminiConfig) -> None:
|
def __init__(self, config: GeminiConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -21,6 +22,11 @@ class GeminiInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self):
|
||||||
|
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
|
|
|
@ -4,30 +4,15 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from collections.abc import AsyncIterator
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAIChatCompletionChunk,
|
|
||||||
OpenAIChoiceDelta,
|
|
||||||
OpenAIChunkChoice,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
OpenAISystemMessageParam,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
prepare_openai_completion_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
_config: GroqConfig
|
_config: GroqConfig
|
||||||
|
|
||||||
def __init__(self, config: GroqConfig):
|
def __init__(self, config: GroqConfig):
|
||||||
|
@ -40,122 +25,14 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
return f"{self.config.url}/openai/v1"
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
|
||||||
def _get_openai_client(self) -> AsyncOpenAI:
|
|
||||||
return AsyncOpenAI(
|
|
||||||
base_url=f"{self.config.url}/openai/v1",
|
|
||||||
api_key=self.get_api_key(),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: list[OpenAIMessageParam],
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
function_call: str | dict[str, Any] | None = None,
|
|
||||||
functions: list[dict[str, Any]] | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_completion_tokens: int | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
parallel_tool_calls: bool | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
response_format: OpenAIResponseFormatParam | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
tool_choice: str | dict[str, Any] | None = None,
|
|
||||||
tools: list[dict[str, Any]] | None = None,
|
|
||||||
top_logprobs: int | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
||||||
model_obj = await self.model_store.get_model(model)
|
|
||||||
|
|
||||||
# Groq does not support json_schema response format, so we need to convert it to json_object
|
|
||||||
if response_format and response_format.type == "json_schema":
|
|
||||||
response_format.type = "json_object"
|
|
||||||
schema = response_format.json_schema.get("schema", {})
|
|
||||||
response_format.json_schema = None
|
|
||||||
json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
|
|
||||||
if messages and messages[0].role == "system":
|
|
||||||
messages[0].content = messages[0].content + json_instructions
|
|
||||||
else:
|
|
||||||
messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
|
|
||||||
|
|
||||||
# Groq returns a 400 error if tools are provided but none are called
|
|
||||||
# So, set tool_choice to "required" to attempt to force a call
|
|
||||||
if tools and (not tool_choice or tool_choice == "auto"):
|
|
||||||
tool_choice = "required"
|
|
||||||
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
messages=messages,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
function_call=function_call,
|
|
||||||
functions=functions,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_completion_tokens=max_completion_tokens,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
response_format=response_format,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tools=tools,
|
|
||||||
top_logprobs=top_logprobs,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Groq does not support streaming requests that set response_format
|
|
||||||
fake_stream = False
|
|
||||||
if stream and response_format:
|
|
||||||
params["stream"] = False
|
|
||||||
fake_stream = True
|
|
||||||
|
|
||||||
response = await self._get_openai_client().chat.completions.create(**params)
|
|
||||||
|
|
||||||
if fake_stream:
|
|
||||||
chunk_choices = []
|
|
||||||
for choice in response.choices:
|
|
||||||
delta = OpenAIChoiceDelta(
|
|
||||||
content=choice.message.content,
|
|
||||||
role=choice.message.role,
|
|
||||||
tool_calls=choice.message.tool_calls,
|
|
||||||
)
|
|
||||||
chunk_choice = OpenAIChunkChoice(
|
|
||||||
delta=delta,
|
|
||||||
finish_reason=choice.finish_reason,
|
|
||||||
index=choice.index,
|
|
||||||
logprobs=None,
|
|
||||||
)
|
|
||||||
chunk_choices.append(chunk_choice)
|
|
||||||
chunk = OpenAIChatCompletionChunk(
|
|
||||||
id=response.id,
|
|
||||||
choices=chunk_choices,
|
|
||||||
object="chat.completion.chunk",
|
|
||||||
created=response.created,
|
|
||||||
model=response.model,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _fake_stream_generator():
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
return _fake_stream_generator()
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
|
|
|
@ -118,10 +118,10 @@ class OllamaInferenceAdapter(
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
||||||
health_response = await self.health()
|
r = await self.health()
|
||||||
if health_response["status"] == HealthStatus.ERROR:
|
if r["status"] == HealthStatus.ERROR:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Ollama Server is not running, make sure to start it using `ollama serve` in a separate terminal"
|
f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
async def should_refresh_models(self) -> bool:
|
||||||
|
@ -156,7 +156,7 @@ class OllamaInferenceAdapter(
|
||||||
),
|
),
|
||||||
Model(
|
Model(
|
||||||
identifier="nomic-embed-text",
|
identifier="nomic-embed-text",
|
||||||
provider_resource_id="nomic-embed-text",
|
provider_resource_id="nomic-embed-text:latest",
|
||||||
provider_id=provider_id,
|
provider_id=provider_id,
|
||||||
metadata={
|
metadata={
|
||||||
"embedding_dimension": 768,
|
"embedding_dimension": 768,
|
||||||
|
|
|
@ -4,13 +4,26 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
|
"""
|
||||||
|
SambaNova Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
|
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||||
|
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||||
|
is used instead of LiteLLMOpenAIMixin.check_model_availability().
|
||||||
|
|
||||||
|
- OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists
|
||||||
|
- LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, config: SambaNovaImplConfig):
|
def __init__(self, config: SambaNovaImplConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.environment_available_models = []
|
self.environment_available_models = []
|
||||||
|
@ -24,3 +37,14 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
download_images=True, # SambaNova requires base64 image encoding
|
download_images=True, # SambaNova requires base64 image encoding
|
||||||
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||||
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the base URL for OpenAI mixin.
|
||||||
|
|
||||||
|
:return: The SambaNova base URL
|
||||||
|
"""
|
||||||
|
return self.config.url
|
||||||
|
|
|
@ -6,16 +6,20 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import google.auth.transport.requests
|
||||||
|
from google.auth import default
|
||||||
|
|
||||||
from llama_stack.apis.inference import ChatCompletionRequest
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
LiteLLMOpenAIMixin,
|
LiteLLMOpenAIMixin,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import VertexAIConfig
|
from .config import VertexAIConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
def __init__(self, config: VertexAIConfig) -> None:
|
def __init__(self, config: VertexAIConfig) -> None:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
LiteLLMOpenAIMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -27,10 +31,31 @@ class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
# Vertex AI doesn't use API keys, it uses Application Default Credentials
|
"""
|
||||||
# Return empty string to let litellm handle authentication via ADC
|
Get an access token for Vertex AI using Application Default Credentials.
|
||||||
|
|
||||||
|
Vertex AI uses ADC instead of API keys. This method obtains an access token
|
||||||
|
from the default credentials and returns it for use with the OpenAI-compatible client.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get default credentials - will read from GOOGLE_APPLICATION_CREDENTIALS
|
||||||
|
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
|
||||||
|
credentials.refresh(google.auth.transport.requests.Request())
|
||||||
|
return str(credentials.token)
|
||||||
|
except Exception:
|
||||||
|
# If we can't get credentials, return empty string to let LiteLLM handle it
|
||||||
|
# This allows the LiteLLM mixin to work with ADC directly
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
def get_base_url(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the Vertex AI OpenAI-compatible API base URL.
|
||||||
|
|
||||||
|
Returns the Vertex AI OpenAI-compatible endpoint URL.
|
||||||
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
||||||
|
"""
|
||||||
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
# Get base parameters from parent
|
# Get base parameters from parent
|
||||||
params = await super()._get_params(request)
|
params = await super()._get_params(request)
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
import json
|
import json
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
from collections.abc import AsyncGenerator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -38,13 +38,6 @@ from llama_stack.apis.inference import (
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
ModelStore,
|
ModelStore,
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIEmbeddingData,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
OpenAIEmbeddingUsage,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -71,11 +64,11 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
convert_message_to_openai_dict,
|
convert_message_to_openai_dict,
|
||||||
convert_tool_call,
|
convert_tool_call,
|
||||||
get_sampling_options,
|
get_sampling_options,
|
||||||
prepare_openai_completion_params,
|
|
||||||
process_chat_completion_stream_response,
|
process_chat_completion_stream_response,
|
||||||
process_completion_response,
|
process_completion_response,
|
||||||
process_completion_stream_response,
|
process_completion_stream_response,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
completion_request_to_prompt,
|
completion_request_to_prompt,
|
||||||
content_has_media,
|
content_has_media,
|
||||||
|
@ -288,7 +281,7 @@ async def _process_vllm_chat_completion_stream_response(
|
||||||
yield c
|
yield c
|
||||||
|
|
||||||
|
|
||||||
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
class VLLMInferenceAdapter(OpenAIMixin, Inference, ModelsProtocolPrivate):
|
||||||
# automatically set by the resolver when instantiating the provider
|
# automatically set by the resolver when instantiating the provider
|
||||||
__provider_id__: str
|
__provider_id__: str
|
||||||
model_store: ModelStore | None = None
|
model_store: ModelStore | None = None
|
||||||
|
@ -296,7 +289,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||||
self.config = config
|
self.config = config
|
||||||
self.client = None
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
if not self.config.url:
|
if not self.config.url:
|
||||||
|
@ -308,8 +300,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
return self.config.refresh_models
|
return self.config.refresh_models
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
async def list_models(self) -> list[Model] | None:
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None # mypy
|
|
||||||
models = []
|
models = []
|
||||||
async for m in self.client.models.list():
|
async for m in self.client.models.list():
|
||||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
||||||
|
@ -340,8 +330,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
HealthResponse: A dictionary containing the health status.
|
HealthResponse: A dictionary containing the health status.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
client = self._create_client() if self.client is None else self.client
|
_ = [m async for m in self.client.models.list()] # Ensure the client is initialized
|
||||||
_ = [m async for m in client.models.list()] # Ensure the client is initialized
|
|
||||||
return HealthResponse(status=HealthStatus.OK)
|
return HealthResponse(status=HealthStatus.OK)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||||
|
@ -351,19 +340,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
raise ValueError("Model store not set")
|
raise ValueError("Model store not set")
|
||||||
return await self.model_store.get_model(model_id)
|
return await self.model_store.get_model(model_id)
|
||||||
|
|
||||||
def _lazy_initialize_client(self):
|
def get_api_key(self):
|
||||||
if self.client is not None:
|
return self.config.api_token
|
||||||
return
|
|
||||||
|
|
||||||
log.info(f"Initializing vLLM client with base_url={self.config.url}")
|
def get_base_url(self):
|
||||||
self.client = self._create_client()
|
return self.config.url
|
||||||
|
|
||||||
def _create_client(self):
|
def get_extra_client_params(self):
|
||||||
return AsyncOpenAI(
|
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||||
base_url=self.config.url,
|
|
||||||
api_key=self.config.api_token,
|
|
||||||
http_client=httpx.AsyncClient(verify=self.config.tls_verify),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def completion(
|
async def completion(
|
||||||
self,
|
self,
|
||||||
|
@ -374,7 +358,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
logprobs: LogProbConfig | None = None,
|
logprobs: LogProbConfig | None = None,
|
||||||
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
||||||
self._lazy_initialize_client()
|
|
||||||
if sampling_params is None:
|
if sampling_params is None:
|
||||||
sampling_params = SamplingParams()
|
sampling_params = SamplingParams()
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
@ -406,7 +389,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
logprobs: LogProbConfig | None = None,
|
logprobs: LogProbConfig | None = None,
|
||||||
tool_config: ToolConfig | None = None,
|
tool_config: ToolConfig | None = None,
|
||||||
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
|
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
|
||||||
self._lazy_initialize_client()
|
|
||||||
if sampling_params is None:
|
if sampling_params is None:
|
||||||
sampling_params = SamplingParams()
|
sampling_params = SamplingParams()
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
@ -479,16 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
# register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
|
|
||||||
# self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
|
|
||||||
# Changing this may lead to unpredictable behavior.
|
|
||||||
client = self._create_client() if self.client is None else self.client
|
|
||||||
try:
|
try:
|
||||||
model = await self.register_helper.register_model(model)
|
model = await self.register_helper.register_model(model)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass # Ignore statically unknown model, will check live listing
|
pass # Ignore statically unknown model, will check live listing
|
||||||
try:
|
try:
|
||||||
res = await client.models.list()
|
res = await self.client.models.list()
|
||||||
except APIConnectionError as e:
|
except APIConnectionError as e:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
||||||
|
@ -543,8 +521,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
output_dimension: int | None = None,
|
output_dimension: int | None = None,
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None
|
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
@ -560,154 +536,3 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
|
|
||||||
embeddings = [data.embedding for data in response.data]
|
embeddings = [data.embedding for data in response.data]
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
async def openai_embeddings(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
input: str | list[str],
|
|
||||||
encoding_format: str | None = "float",
|
|
||||||
dimensions: int | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIEmbeddingsResponse:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
assert model_obj.model_type == ModelType.embedding
|
|
||||||
|
|
||||||
# Convert input to list if it's a string
|
|
||||||
input_list = [input] if isinstance(input, str) else input
|
|
||||||
|
|
||||||
# Call vLLM embeddings endpoint with encoding_format
|
|
||||||
response = await self.client.embeddings.create(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
input=input_list,
|
|
||||||
dimensions=dimensions,
|
|
||||||
encoding_format=encoding_format,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert response to OpenAI format
|
|
||||||
data = [
|
|
||||||
OpenAIEmbeddingData(
|
|
||||||
embedding=embedding_data.embedding,
|
|
||||||
index=i,
|
|
||||||
)
|
|
||||||
for i, embedding_data in enumerate(response.data)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Not returning actual token usage since vLLM doesn't provide it
|
|
||||||
usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingsResponse(
|
|
||||||
data=data,
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
prompt: str | list[str] | list[int] | list[list[int]],
|
|
||||||
best_of: int | None = None,
|
|
||||||
echo: bool | None = None,
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
guided_choice: list[str] | None = None,
|
|
||||||
prompt_logprobs: int | None = None,
|
|
||||||
suffix: str | None = None,
|
|
||||||
) -> OpenAICompletion:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
|
|
||||||
extra_body: dict[str, Any] = {}
|
|
||||||
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
|
||||||
extra_body["prompt_logprobs"] = prompt_logprobs
|
|
||||||
if guided_choice:
|
|
||||||
extra_body["guided_choice"] = guided_choice
|
|
||||||
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
prompt=prompt,
|
|
||||||
best_of=best_of,
|
|
||||||
echo=echo,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
extra_body=extra_body,
|
|
||||||
)
|
|
||||||
return await self.client.completions.create(**params) # type: ignore
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: list[OpenAIMessageParam],
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
function_call: str | dict[str, Any] | None = None,
|
|
||||||
functions: list[dict[str, Any]] | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_completion_tokens: int | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
parallel_tool_calls: bool | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
response_format: OpenAIResponseFormatParam | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
tool_choice: str | dict[str, Any] | None = None,
|
|
||||||
tools: list[dict[str, Any]] | None = None,
|
|
||||||
top_logprobs: int | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
messages=messages,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
function_call=function_call,
|
|
||||||
functions=functions,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_completion_tokens=max_completion_tokens,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
response_format=response_format,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tools=tools,
|
|
||||||
top_logprobs=top_logprobs,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
return await self.client.chat.completions.create(**params) # type: ignore
|
|
||||||
|
|
|
@ -7,8 +7,8 @@
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
from collections.abc import AsyncGenerator, AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from ibm_watson_machine_learning.foundation_models import Model
|
from ibm_watsonx_ai.foundation_models import Model
|
||||||
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
|
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
|
||||||
|
|
|
@ -4,53 +4,55 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
class BedrockBaseConfig(BaseModel):
|
class BedrockBaseConfig(BaseModel):
|
||||||
aws_access_key_id: str | None = Field(
|
aws_access_key_id: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_ACCESS_KEY_ID"),
|
||||||
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
||||||
)
|
)
|
||||||
aws_secret_access_key: str | None = Field(
|
aws_secret_access_key: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||||
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
||||||
)
|
)
|
||||||
aws_session_token: str | None = Field(
|
aws_session_token: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_SESSION_TOKEN"),
|
||||||
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
||||||
)
|
)
|
||||||
region_name: str | None = Field(
|
region_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
|
||||||
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
||||||
"Default use environment variable: AWS_DEFAULT_REGION",
|
"Default use environment variable: AWS_DEFAULT_REGION",
|
||||||
)
|
)
|
||||||
profile_name: str | None = Field(
|
profile_name: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_PROFILE"),
|
||||||
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
description="The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE",
|
||||||
)
|
)
|
||||||
total_max_attempts: int | None = Field(
|
total_max_attempts: int | None = Field(
|
||||||
default=None,
|
default_factory=lambda: int(val) if (val := os.getenv("AWS_MAX_ATTEMPTS")) else None,
|
||||||
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
||||||
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
||||||
)
|
)
|
||||||
retry_mode: str | None = Field(
|
retry_mode: str | None = Field(
|
||||||
default=None,
|
default_factory=lambda: os.getenv("AWS_RETRY_MODE"),
|
||||||
description="A string representing the type of retries Boto3 will perform."
|
description="A string representing the type of retries Boto3 will perform."
|
||||||
"Default use environment variable: AWS_RETRY_MODE",
|
"Default use environment variable: AWS_RETRY_MODE",
|
||||||
)
|
)
|
||||||
connect_timeout: float | None = Field(
|
connect_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_CONNECT_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
read_timeout: float | None = Field(
|
read_timeout: float | None = Field(
|
||||||
default=60,
|
default_factory=lambda: float(os.getenv("AWS_READ_TIMEOUT", "60")),
|
||||||
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
||||||
"The default is 60 seconds.",
|
"The default is 60 seconds.",
|
||||||
)
|
)
|
||||||
session_ttl: int | None = Field(
|
session_ttl: int | None = Field(
|
||||||
default=3600,
|
default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
|
||||||
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import base64
|
import base64
|
||||||
import struct
|
import struct
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
@ -43,9 +44,11 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
model = await self.model_store.get_model(model_id)
|
model = await self.model_store.get_model(model_id)
|
||||||
embedding_model = self._load_sentence_transformer_model(model.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(
|
embeddings = await asyncio.to_thread(
|
||||||
[interleaved_content_as_str(content) for content in contents], show_progress_bar=False
|
embedding_model.encode,
|
||||||
|
[interleaved_content_as_str(content) for content in contents],
|
||||||
|
show_progress_bar=False,
|
||||||
)
|
)
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
|
@ -64,8 +67,8 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
|
|
||||||
# Get the model and generate embeddings
|
# Get the model and generate embeddings
|
||||||
model_obj = await self.model_store.get_model(model)
|
model_obj = await self.model_store.get_model(model)
|
||||||
embedding_model = self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
||||||
embeddings = embedding_model.encode(input_list, show_progress_bar=False)
|
embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
|
||||||
|
|
||||||
# Convert embeddings to the requested format
|
# Convert embeddings to the requested format
|
||||||
data = []
|
data = []
|
||||||
|
@ -93,7 +96,7 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
usage=usage,
|
usage=usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
async def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
||||||
global EMBEDDING_MODELS
|
global EMBEDDING_MODELS
|
||||||
|
|
||||||
loaded_model = EMBEDDING_MODELS.get(model)
|
loaded_model = EMBEDDING_MODELS.get(model)
|
||||||
|
@ -101,8 +104,12 @@ class SentenceTransformerEmbeddingMixin:
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
||||||
log.info(f"Loading sentence transformer for {model}...")
|
log.info(f"Loading sentence transformer for {model}...")
|
||||||
|
|
||||||
|
def _load_model():
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
loaded_model = SentenceTransformer(model)
|
return SentenceTransformer(model)
|
||||||
|
|
||||||
|
loaded_model = await asyncio.to_thread(_load_model)
|
||||||
EMBEDDING_MODELS[model] = loaded_model
|
EMBEDDING_MODELS[model] = loaded_model
|
||||||
return loaded_model
|
return loaded_model
|
||||||
|
|
|
@ -3,6 +3,11 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
import asyncio
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ListOpenAIChatCompletionResponse,
|
ListOpenAIChatCompletionResponse,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
@ -10,24 +15,43 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
Order,
|
Order,
|
||||||
)
|
)
|
||||||
from llama_stack.core.datatypes import AccessRule
|
from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig
|
||||||
from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
from ..sqlstore.api import ColumnDefinition, ColumnType
|
from ..sqlstore.api import ColumnDefinition, ColumnType
|
||||||
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
|
from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="inference_store")
|
||||||
|
|
||||||
|
|
||||||
class InferenceStore:
|
class InferenceStore:
|
||||||
def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]):
|
def __init__(
|
||||||
if not sql_store_config:
|
self,
|
||||||
sql_store_config = SqliteSqlStoreConfig(
|
config: InferenceStoreConfig | SqlStoreConfig,
|
||||||
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
|
policy: list[AccessRule],
|
||||||
|
):
|
||||||
|
# Handle backward compatibility
|
||||||
|
if not isinstance(config, InferenceStoreConfig):
|
||||||
|
# Legacy: SqlStoreConfig passed directly as config
|
||||||
|
config = InferenceStoreConfig(
|
||||||
|
sql_store_config=config,
|
||||||
)
|
)
|
||||||
self.sql_store_config = sql_store_config
|
|
||||||
|
self.config = config
|
||||||
|
self.sql_store_config = config.sql_store_config
|
||||||
self.sql_store = None
|
self.sql_store = None
|
||||||
self.policy = policy
|
self.policy = policy
|
||||||
|
|
||||||
|
# Disable write queue for SQLite to avoid concurrency issues
|
||||||
|
self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
|
||||||
|
|
||||||
|
# Async write queue and worker control
|
||||||
|
self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
|
||||||
|
self._worker_tasks: list[asyncio.Task[Any]] = []
|
||||||
|
self._max_write_queue_size: int = config.max_write_queue_size
|
||||||
|
self._num_writers: int = max(1, config.num_writers)
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
"""Create the necessary tables if they don't exist."""
|
"""Create the necessary tables if they don't exist."""
|
||||||
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
|
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
|
||||||
|
@ -42,23 +66,109 @@ class InferenceStore:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.enable_write_queue:
|
||||||
|
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
||||||
|
for _ in range(self._num_writers):
|
||||||
|
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
||||||
|
else:
|
||||||
|
logger.info("Write queue disabled for SQLite to avoid concurrency issues")
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
if not self._worker_tasks:
|
||||||
|
return
|
||||||
|
if self._queue is not None:
|
||||||
|
await self._queue.join()
|
||||||
|
for t in self._worker_tasks:
|
||||||
|
if not t.done():
|
||||||
|
t.cancel()
|
||||||
|
for t in self._worker_tasks:
|
||||||
|
try:
|
||||||
|
await t
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
self._worker_tasks.clear()
|
||||||
|
|
||||||
|
async def flush(self) -> None:
|
||||||
|
"""Wait for all queued writes to complete. Useful for testing."""
|
||||||
|
if self.enable_write_queue and self._queue is not None:
|
||||||
|
await self._queue.join()
|
||||||
|
|
||||||
async def store_chat_completion(
|
async def store_chat_completion(
|
||||||
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||||
) -> None:
|
) -> None:
|
||||||
if not self.sql_store:
|
if self.enable_write_queue:
|
||||||
|
if self._queue is None:
|
||||||
|
raise ValueError("Inference store is not initialized")
|
||||||
|
try:
|
||||||
|
self._queue.put_nowait((chat_completion, input_messages))
|
||||||
|
except asyncio.QueueFull:
|
||||||
|
logger.warning(
|
||||||
|
f"Write queue full; adding chat completion id={getattr(chat_completion, 'id', '<unknown>')}"
|
||||||
|
)
|
||||||
|
await self._queue.put((chat_completion, input_messages))
|
||||||
|
else:
|
||||||
|
await self._write_chat_completion(chat_completion, input_messages)
|
||||||
|
|
||||||
|
async def _worker_loop(self) -> None:
|
||||||
|
assert self._queue is not None
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = await self._queue.get()
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
chat_completion, input_messages = item
|
||||||
|
try:
|
||||||
|
await self._write_chat_completion(chat_completion, input_messages)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.error(f"Error writing chat completion: {e}")
|
||||||
|
finally:
|
||||||
|
self._queue.task_done()
|
||||||
|
|
||||||
|
async def _write_chat_completion(
|
||||||
|
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||||
|
) -> None:
|
||||||
|
if self.sql_store is None:
|
||||||
raise ValueError("Inference store is not initialized")
|
raise ValueError("Inference store is not initialized")
|
||||||
|
|
||||||
data = chat_completion.model_dump()
|
data = chat_completion.model_dump()
|
||||||
|
record_data = {
|
||||||
await self.sql_store.insert(
|
|
||||||
table="chat_completions",
|
|
||||||
data={
|
|
||||||
"id": data["id"],
|
"id": data["id"],
|
||||||
"created": data["created"],
|
"created": data["created"],
|
||||||
"model": data["model"],
|
"model": data["model"],
|
||||||
"choices": data["choices"],
|
"choices": data["choices"],
|
||||||
"input_messages": [message.model_dump() for message in input_messages],
|
"input_messages": [message.model_dump() for message in input_messages],
|
||||||
},
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.sql_store.insert(
|
||||||
|
table="chat_completions",
|
||||||
|
data=record_data,
|
||||||
|
)
|
||||||
|
except IntegrityError as e:
|
||||||
|
# Duplicate chat completion IDs can be generated during tests especially if they are replaying
|
||||||
|
# recorded responses across different tests. No need to warn or error under those circumstances.
|
||||||
|
# In the wild, this is not likely to happen at all (no evidence) so we aren't really hiding any problem.
|
||||||
|
|
||||||
|
# Check if it's a unique constraint violation
|
||||||
|
error_message = str(e.orig) if e.orig else str(e)
|
||||||
|
if self._is_unique_constraint_error(error_message):
|
||||||
|
# Update the existing record instead
|
||||||
|
await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
|
||||||
|
else:
|
||||||
|
# Re-raise if it's not a unique constraint error
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _is_unique_constraint_error(self, error_message: str) -> bool:
|
||||||
|
"""Check if the error is specifically a unique constraint violation."""
|
||||||
|
error_lower = error_message.lower()
|
||||||
|
return any(
|
||||||
|
indicator in error_lower
|
||||||
|
for indicator in [
|
||||||
|
"unique constraint failed", # SQLite
|
||||||
|
"duplicate key", # PostgreSQL
|
||||||
|
"unique violation", # PostgreSQL alternative
|
||||||
|
"duplicate entry", # MySQL
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
async def list_chat_completions(
|
async def list_chat_completions(
|
||||||
|
|
|
@ -67,6 +67,17 @@ class OpenAIMixin(ABC):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_extra_client_params(self) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get any extra parameters to pass to the AsyncOpenAI client.
|
||||||
|
|
||||||
|
Child classes can override this method to provide additional parameters
|
||||||
|
such as timeout settings, proxies, etc.
|
||||||
|
|
||||||
|
:return: A dictionary of extra parameters
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def client(self) -> AsyncOpenAI:
|
def client(self) -> AsyncOpenAI:
|
||||||
"""
|
"""
|
||||||
|
@ -78,6 +89,7 @@ class OpenAIMixin(ABC):
|
||||||
return AsyncOpenAI(
|
return AsyncOpenAI(
|
||||||
api_key=self.get_api_key(),
|
api_key=self.get_api_key(),
|
||||||
base_url=self.get_base_url(),
|
base_url=self.get_base_url(),
|
||||||
|
**self.get_extra_client_params(),
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _get_provider_model_id(self, model: str) -> str:
|
async def _get_provider_model_id(self, model: str) -> str:
|
||||||
|
@ -124,10 +136,15 @@ class OpenAIMixin(ABC):
|
||||||
"""
|
"""
|
||||||
Direct OpenAI completion API call.
|
Direct OpenAI completion API call.
|
||||||
"""
|
"""
|
||||||
if guided_choice is not None:
|
# Handle parameters that are not supported by OpenAI API, but may be by the provider
|
||||||
logger.warning("guided_choice is not supported by the OpenAI API. Ignoring.")
|
# prompt_logprobs is supported by vLLM
|
||||||
if prompt_logprobs is not None:
|
# guided_choice is supported by vLLM
|
||||||
logger.warning("prompt_logprobs is not supported by the OpenAI API. Ignoring.")
|
# TODO: test coverage
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
||||||
|
extra_body["prompt_logprobs"] = prompt_logprobs
|
||||||
|
if guided_choice:
|
||||||
|
extra_body["guided_choice"] = guided_choice
|
||||||
|
|
||||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||||
return await self.client.completions.create( # type: ignore[no-any-return]
|
return await self.client.completions.create( # type: ignore[no-any-return]
|
||||||
|
@ -150,7 +167,8 @@ class OpenAIMixin(ABC):
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
user=user,
|
user=user,
|
||||||
suffix=suffix,
|
suffix=suffix,
|
||||||
)
|
),
|
||||||
|
extra_body=extra_body,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
|
|
|
@ -172,6 +172,20 @@ class AuthorizedSqlStore:
|
||||||
|
|
||||||
return results.data[0] if results.data else None
|
return results.data[0] if results.data else None
|
||||||
|
|
||||||
|
async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None:
|
||||||
|
"""Update rows with automatic access control attribute capture."""
|
||||||
|
enhanced_data = dict(data)
|
||||||
|
|
||||||
|
current_user = get_authenticated_user()
|
||||||
|
if current_user:
|
||||||
|
enhanced_data["owner_principal"] = current_user.principal
|
||||||
|
enhanced_data["access_attributes"] = current_user.attributes
|
||||||
|
else:
|
||||||
|
enhanced_data["owner_principal"] = None
|
||||||
|
enhanced_data["access_attributes"] = None
|
||||||
|
|
||||||
|
await self.sql_store.update(table, enhanced_data, where)
|
||||||
|
|
||||||
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
||||||
"""Delete rows with automatic access control filtering."""
|
"""Delete rows with automatic access control filtering."""
|
||||||
await self.sql_store.delete(table, where)
|
await self.sql_store.delete(table, where)
|
||||||
|
|
|
@ -18,6 +18,7 @@ from functools import wraps
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
from llama_stack.apis.telemetry import (
|
||||||
|
Event,
|
||||||
LogSeverity,
|
LogSeverity,
|
||||||
Span,
|
Span,
|
||||||
SpanEndPayload,
|
SpanEndPayload,
|
||||||
|
@ -98,7 +99,7 @@ class BackgroundLogger:
|
||||||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
def __init__(self, api: Telemetry, capacity: int = 100000):
|
||||||
self.api = api
|
self.api = api
|
||||||
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
||||||
self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
|
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
||||||
self.worker_thread.start()
|
self.worker_thread.start()
|
||||||
self._last_queue_full_log_time: float = 0.0
|
self._last_queue_full_log_time: float = 0.0
|
||||||
self._dropped_since_last_notice: int = 0
|
self._dropped_since_last_notice: int = 0
|
||||||
|
@ -118,12 +119,16 @@ class BackgroundLogger:
|
||||||
self._last_queue_full_log_time = current_time
|
self._last_queue_full_log_time = current_time
|
||||||
self._dropped_since_last_notice = 0
|
self._dropped_since_last_notice = 0
|
||||||
|
|
||||||
def _process_logs(self):
|
def _worker(self):
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
loop.run_until_complete(self._process_logs())
|
||||||
|
|
||||||
|
async def _process_logs(self):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
event = self.log_queue.get()
|
event = self.log_queue.get()
|
||||||
# figure out how to use a thread's native loop
|
await self.api.log_event(event)
|
||||||
asyncio.run(self.api.log_event(event))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
@ -136,6 +141,19 @@ class BackgroundLogger:
|
||||||
self.log_queue.join()
|
self.log_queue.join()
|
||||||
|
|
||||||
|
|
||||||
|
def enqueue_event(event: Event) -> None:
|
||||||
|
"""Enqueue a telemetry event to the background logger if available.
|
||||||
|
|
||||||
|
This provides a non-blocking path for routers and other hot paths to
|
||||||
|
submit telemetry without awaiting the Telemetry API, reducing contention
|
||||||
|
with the main event loop.
|
||||||
|
"""
|
||||||
|
global BACKGROUND_LOGGER
|
||||||
|
if BACKGROUND_LOGGER is None:
|
||||||
|
raise RuntimeError("Telemetry API not initialized")
|
||||||
|
BACKGROUND_LOGGER.log_event(event)
|
||||||
|
|
||||||
|
|
||||||
class TraceContext:
|
class TraceContext:
|
||||||
spans: list[Span] = []
|
spans: list[Span] = []
|
||||||
|
|
||||||
|
@ -256,11 +274,7 @@ class TelemetryHandler(logging.Handler):
|
||||||
if record.module in ("asyncio", "selector_events"):
|
if record.module in ("asyncio", "selector_events"):
|
||||||
return
|
return
|
||||||
|
|
||||||
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
global CURRENT_TRACE_CONTEXT
|
||||||
|
|
||||||
if BACKGROUND_LOGGER is None:
|
|
||||||
raise RuntimeError("Telemetry API not initialized")
|
|
||||||
|
|
||||||
context = CURRENT_TRACE_CONTEXT.get()
|
context = CURRENT_TRACE_CONTEXT.get()
|
||||||
if context is None:
|
if context is None:
|
||||||
return
|
return
|
||||||
|
@ -269,7 +283,7 @@ class TelemetryHandler(logging.Handler):
|
||||||
if span is None:
|
if span is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
BACKGROUND_LOGGER.log_event(
|
enqueue_event(
|
||||||
UnstructuredLogEvent(
|
UnstructuredLogEvent(
|
||||||
trace_id=span.trace_id,
|
trace_id=span.trace_id,
|
||||||
span_id=span.span_id,
|
span_id=span.span_id,
|
||||||
|
|
|
@ -67,6 +67,38 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
||||||
raise AuthenticationRequiredError(exc) from exc
|
raise AuthenticationRequiredError(exc) from exc
|
||||||
if i == len(connection_strategies) - 1:
|
if i == len(connection_strategies) - 1:
|
||||||
raise
|
raise
|
||||||
|
except* httpx.ConnectError as eg:
|
||||||
|
# Connection refused, server down, network unreachable
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
|
||||||
|
logger.error(f"MCP connection error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"failed to connect to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.TimeoutException as eg:
|
||||||
|
# Request timeout, server too slow
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
error_msg = f"MCP server at {endpoint} timed out"
|
||||||
|
logger.error(f"MCP timeout error: {error_msg}")
|
||||||
|
raise TimeoutError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"MCP server at {endpoint} timed out via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
|
except* httpx.RequestError as eg:
|
||||||
|
# DNS resolution failures, network errors, invalid URLs
|
||||||
|
if i == len(connection_strategies) - 1:
|
||||||
|
# Get the first exception's message for the error string
|
||||||
|
exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
|
||||||
|
error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
|
||||||
|
logger.error(f"MCP network error: {error_msg}")
|
||||||
|
raise ConnectionError(error_msg) from eg
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"network error connecting to MCP server at {endpoint} via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
||||||
|
)
|
||||||
except* McpError:
|
except* McpError:
|
||||||
if i < len(connection_strategies) - 1:
|
if i < len(connection_strategies) - 1:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
|
@ -30,6 +30,9 @@ from openai.types.completion_choice import CompletionChoice
|
||||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||||
CompletionChoice.model_rebuild()
|
CompletionChoice.model_rebuild()
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
|
||||||
|
|
||||||
|
|
||||||
class InferenceMode(StrEnum):
|
class InferenceMode(StrEnum):
|
||||||
LIVE = "live"
|
LIVE = "live"
|
||||||
|
@ -51,7 +54,7 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
|
||||||
|
|
||||||
|
|
||||||
def get_inference_mode() -> InferenceMode:
|
def get_inference_mode() -> InferenceMode:
|
||||||
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower())
|
return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "replay").lower())
|
||||||
|
|
||||||
|
|
||||||
def setup_inference_recording():
|
def setup_inference_recording():
|
||||||
|
@ -60,28 +63,18 @@ def setup_inference_recording():
|
||||||
to increase their reliability and reduce reliance on expensive, external services.
|
to increase their reliability and reduce reliance on expensive, external services.
|
||||||
|
|
||||||
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
||||||
Calls to the /models endpoint are not currently trapped. We probably need to add support for this.
|
|
||||||
|
|
||||||
Two environment variables are required:
|
Two environment variables are supported:
|
||||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'.
|
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
|
||||||
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in.
|
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
||||||
|
|
||||||
The recordings are stored in a SQLite database and a JSON file for each request. The SQLite database is used to
|
The recordings are stored as JSON files.
|
||||||
quickly find the correct recording for a given request. The JSON files are used to store the request and response
|
|
||||||
bodies.
|
|
||||||
"""
|
"""
|
||||||
mode = get_inference_mode()
|
mode = get_inference_mode()
|
||||||
|
|
||||||
if mode not in InferenceMode:
|
|
||||||
raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
|
|
||||||
|
|
||||||
if mode == InferenceMode.LIVE:
|
if mode == InferenceMode.LIVE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
|
storage_dir = os.environ.get("LLAMA_STACK_TEST_RECORDING_DIR", DEFAULT_STORAGE_DIR)
|
||||||
raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
|
|
||||||
storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
|
|
||||||
|
|
||||||
return inference_recording(mode=mode, storage_dir=storage_dir)
|
return inference_recording(mode=mode, storage_dir=storage_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,7 +105,11 @@ def _deserialize_response(data: dict[str, Any]) -> Any:
|
||||||
|
|
||||||
return cls.model_validate(data["__data__"])
|
return cls.model_validate(data["__data__"])
|
||||||
except (ImportError, AttributeError, TypeError, ValueError) as e:
|
except (ImportError, AttributeError, TypeError, ValueError) as e:
|
||||||
logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
|
logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_validate: {e}")
|
||||||
|
try:
|
||||||
|
return cls.model_construct(**data["__data__"])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_construct: {e}")
|
||||||
return data["__data__"]
|
return data["__data__"]
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
@ -134,8 +131,8 @@ class ResponseStorage:
|
||||||
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
||||||
"""Store a request/response pair."""
|
"""Store a request/response pair."""
|
||||||
# Generate unique response filename
|
# Generate unique response filename
|
||||||
response_file = f"{request_hash[:12]}.json"
|
short_hash = request_hash[:12]
|
||||||
response_path = self.responses_dir / response_file
|
response_file = f"{short_hash}.json"
|
||||||
|
|
||||||
# Serialize response body if needed
|
# Serialize response body if needed
|
||||||
serialized_response = dict(response)
|
serialized_response = dict(response)
|
||||||
|
@ -147,6 +144,14 @@ class ResponseStorage:
|
||||||
# Handle single response
|
# Handle single response
|
||||||
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
||||||
|
|
||||||
|
# If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
|
||||||
|
endpoint = request.get("endpoint")
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
digest = _model_identifiers_digest(endpoint, response)
|
||||||
|
response_file = f"models-{short_hash}-{digest}.json"
|
||||||
|
|
||||||
|
response_path = self.responses_dir / response_file
|
||||||
|
|
||||||
# Save response to JSON file
|
# Save response to JSON file
|
||||||
with open(response_path, "w") as f:
|
with open(response_path, "w") as f:
|
||||||
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
||||||
|
@ -161,6 +166,17 @@ class ResponseStorage:
|
||||||
if not response_path.exists():
|
if not response_path.exists():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
return _recording_from_file(response_path)
|
||||||
|
|
||||||
|
def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
|
||||||
|
data = _recording_from_file(path)
|
||||||
|
results.append(data)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _recording_from_file(response_path) -> dict[str, Any]:
|
||||||
with open(response_path) as f:
|
with open(response_path) as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
|
@ -176,6 +192,61 @@ class ResponseStorage:
|
||||||
return cast(dict[str, Any], data)
|
return cast(dict[str, Any], data)
|
||||||
|
|
||||||
|
|
||||||
|
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
|
def _extract_model_identifiers():
|
||||||
|
"""Extract a stable set of identifiers for model-list endpoints.
|
||||||
|
|
||||||
|
Supported endpoints:
|
||||||
|
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
||||||
|
- '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
|
||||||
|
Returns a list of unique identifiers or None if structure doesn't match.
|
||||||
|
"""
|
||||||
|
body = response["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.get("models")
|
||||||
|
idents = [m.model for m in items]
|
||||||
|
else:
|
||||||
|
items = body.get("data")
|
||||||
|
idents = [m.id for m in items]
|
||||||
|
return sorted(set(idents))
|
||||||
|
|
||||||
|
identifiers = _extract_model_identifiers()
|
||||||
|
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
|
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
|
"""Return a single, unioned recording for supported model-list endpoints."""
|
||||||
|
seen: dict[str, dict[str, Any]] = {}
|
||||||
|
for rec in records:
|
||||||
|
body = rec["response"]["body"]
|
||||||
|
if endpoint == "/api/tags":
|
||||||
|
items = body.models
|
||||||
|
elif endpoint == "/v1/models":
|
||||||
|
items = body.data
|
||||||
|
else:
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for m in items:
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
key = m.id
|
||||||
|
else:
|
||||||
|
key = m.model
|
||||||
|
seen[key] = m
|
||||||
|
|
||||||
|
ordered = [seen[k] for k in sorted(seen.keys())]
|
||||||
|
canonical = records[0]
|
||||||
|
canonical_req = canonical.get("request", {})
|
||||||
|
if isinstance(canonical_req, dict):
|
||||||
|
canonical_req["endpoint"] = endpoint
|
||||||
|
if endpoint == "/v1/models":
|
||||||
|
body = {"data": ordered, "object": "list"}
|
||||||
|
else:
|
||||||
|
from ollama import ListResponse
|
||||||
|
|
||||||
|
body = ListResponse(models=ordered)
|
||||||
|
return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
|
||||||
|
|
||||||
|
|
||||||
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
|
@ -195,8 +266,6 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
raise ValueError(f"Unknown client type: {client_type}")
|
raise ValueError(f"Unknown client type: {client_type}")
|
||||||
|
|
||||||
url = base_url.rstrip("/") + endpoint
|
url = base_url.rstrip("/") + endpoint
|
||||||
|
|
||||||
# Normalize request for matching
|
|
||||||
method = "POST"
|
method = "POST"
|
||||||
headers = {}
|
headers = {}
|
||||||
body = kwargs
|
body = kwargs
|
||||||
|
@ -204,6 +273,11 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
request_hash = normalize_request(method, url, headers, body)
|
request_hash = normalize_request(method, url, headers, body)
|
||||||
|
|
||||||
if _current_mode == InferenceMode.REPLAY:
|
if _current_mode == InferenceMode.REPLAY:
|
||||||
|
# Special handling for model-list endpoints: return union of all responses
|
||||||
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
|
records = _current_storage._model_list_responses(request_hash[:12])
|
||||||
|
recording = _combine_model_list_responses(endpoint, records)
|
||||||
|
else:
|
||||||
recording = _current_storage.find_recording(request_hash)
|
recording = _current_storage.find_recording(request_hash)
|
||||||
if recording:
|
if recording:
|
||||||
response_body = recording["response"]["body"]
|
response_body = recording["response"]["body"]
|
||||||
|
@ -222,7 +296,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
f"No recorded response found for request hash: {request_hash}\n"
|
f"No recorded response found for request hash: {request_hash}\n"
|
||||||
f"Request: {method} {url} {body}\n"
|
f"Request: {method} {url} {body}\n"
|
||||||
f"Model: {body.get('model', 'unknown')}\n"
|
f"Model: {body.get('model', 'unknown')}\n"
|
||||||
f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
|
f"To record this response, run with LLAMA_STACK_TEST_INFERENCE_MODE=record"
|
||||||
)
|
)
|
||||||
|
|
||||||
elif _current_mode == InferenceMode.RECORD:
|
elif _current_mode == InferenceMode.RECORD:
|
||||||
|
@ -274,12 +348,14 @@ def patch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Store original methods for both OpenAI and Ollama clients
|
# Store original methods for both OpenAI and Ollama clients
|
||||||
_original_methods = {
|
_original_methods = {
|
||||||
"chat_completions_create": AsyncChatCompletions.create,
|
"chat_completions_create": AsyncChatCompletions.create,
|
||||||
"completions_create": AsyncCompletions.create,
|
"completions_create": AsyncCompletions.create,
|
||||||
"embeddings_create": AsyncEmbeddings.create,
|
"embeddings_create": AsyncEmbeddings.create,
|
||||||
|
"models_list": AsyncModels.list,
|
||||||
"ollama_generate": OllamaAsyncClient.generate,
|
"ollama_generate": OllamaAsyncClient.generate,
|
||||||
"ollama_chat": OllamaAsyncClient.chat,
|
"ollama_chat": OllamaAsyncClient.chat,
|
||||||
"ollama_embed": OllamaAsyncClient.embed,
|
"ollama_embed": OllamaAsyncClient.embed,
|
||||||
|
@ -304,10 +380,16 @@ def patch_inference_clients():
|
||||||
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def patched_models_list(self, *args, **kwargs):
|
||||||
|
return await _patched_inference_method(
|
||||||
|
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
# Apply OpenAI patches
|
# Apply OpenAI patches
|
||||||
AsyncChatCompletions.create = patched_chat_completions_create
|
AsyncChatCompletions.create = patched_chat_completions_create
|
||||||
AsyncCompletions.create = patched_completions_create
|
AsyncCompletions.create = patched_completions_create
|
||||||
AsyncEmbeddings.create = patched_embeddings_create
|
AsyncEmbeddings.create = patched_embeddings_create
|
||||||
|
AsyncModels.list = patched_models_list
|
||||||
|
|
||||||
# Create patched methods for Ollama client
|
# Create patched methods for Ollama client
|
||||||
async def patched_ollama_generate(self, *args, **kwargs):
|
async def patched_ollama_generate(self, *args, **kwargs):
|
||||||
|
@ -361,11 +443,13 @@ def unpatch_inference_clients():
|
||||||
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
|
||||||
from openai.resources.completions import AsyncCompletions
|
from openai.resources.completions import AsyncCompletions
|
||||||
from openai.resources.embeddings import AsyncEmbeddings
|
from openai.resources.embeddings import AsyncEmbeddings
|
||||||
|
from openai.resources.models import AsyncModels
|
||||||
|
|
||||||
# Restore OpenAI client methods
|
# Restore OpenAI client methods
|
||||||
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
|
||||||
AsyncCompletions.create = _original_methods["completions_create"]
|
AsyncCompletions.create = _original_methods["completions_create"]
|
||||||
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
AsyncEmbeddings.create = _original_methods["embeddings_create"]
|
||||||
|
AsyncModels.list = _original_methods["models_list"]
|
||||||
|
|
||||||
# Restore Ollama client methods if they were patched
|
# Restore Ollama client methods if they were patched
|
||||||
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
|
||||||
|
@ -379,16 +463,10 @@ def unpatch_inference_clients():
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Generator[None, None, None]:
|
||||||
"""Context manager for inference recording/replaying."""
|
"""Context manager for inference recording/replaying."""
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
# Set defaults
|
|
||||||
if storage_dir is None:
|
|
||||||
storage_dir_path = Path.home() / ".llama" / "recordings"
|
|
||||||
else:
|
|
||||||
storage_dir_path = Path(storage_dir)
|
|
||||||
|
|
||||||
# Store previous state
|
# Store previous state
|
||||||
prev_mode = _current_mode
|
prev_mode = _current_mode
|
||||||
prev_storage = _current_storage
|
prev_storage = _current_storage
|
||||||
|
@ -397,7 +475,9 @@ def inference_recording(mode: str = "live", storage_dir: str | Path | None = Non
|
||||||
_current_mode = mode
|
_current_mode = mode
|
||||||
|
|
||||||
if mode in ["record", "replay"]:
|
if mode in ["record", "replay"]:
|
||||||
_current_storage = ResponseStorage(storage_dir_path)
|
if storage_dir is None:
|
||||||
|
raise ValueError("storage_dir is required for record and replay modes")
|
||||||
|
_current_storage = ResponseStorage(Path(storage_dir))
|
||||||
patch_inference_clients()
|
patch_inference_clients()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
509
llama_stack/ui/package-lock.json
generated
509
llama_stack/ui/package-lock.json
generated
|
@ -10,7 +10,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-collapsible": "^1.1.12",
|
"@radix-ui/react-collapsible": "^1.1.12",
|
||||||
"@radix-ui/react-dialog": "^1.1.13",
|
"@radix-ui/react-dialog": "^1.1.13",
|
||||||
"@radix-ui/react-dropdown-menu": "^2.1.14",
|
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
|
@ -18,18 +18,18 @@
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.20",
|
"llama-stack-client": "^0.2.21",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.0.0",
|
"react-dom": "^19.1.1",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.30.0",
|
"remeda": "^2.30.0",
|
||||||
"shiki": "^1.29.2",
|
"shiki": "^1.29.2",
|
||||||
"sonner": "^2.0.6",
|
"sonner": "^2.0.7",
|
||||||
"tailwind-merge": "^3.3.1"
|
"tailwind-merge": "^3.3.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
@ -2066,12 +2066,35 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-arrow": {
|
"node_modules/@radix-ui/react-arrow": {
|
||||||
"version": "1.1.6",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
||||||
"integrity": "sha512-2JMfHJf/eVnwq+2dewT3C0acmCWD3XiVA1Da+jTDqo342UlU13WvXtqHhG+yJw5JeQmu4ue2eMy6gcEArLBlcw==",
|
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-primitive": "2.1.2"
|
"@radix-ui/react-primitive": "2.1.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -2172,15 +2195,15 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-collection": {
|
"node_modules/@radix-ui/react-collection": {
|
||||||
"version": "1.1.6",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
||||||
"integrity": "sha512-PbhRFK4lIEw9ADonj48tiYWzkllz81TM7KVYyyMMw2cwHO7D5h4XKEblL8NlaRisTK3QTe6tBEhDccFUryxHBQ==",
|
"integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-slot": "1.2.2"
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
|
@ -2197,21 +2220,26 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-primitive": {
|
||||||
"version": "1.2.2",
|
"version": "2.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2"
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
},
|
},
|
||||||
"peerDependenciesMeta": {
|
"peerDependenciesMeta": {
|
||||||
"@types/react": {
|
"@types/react": {
|
||||||
"optional": true
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -2342,17 +2370,17 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-dropdown-menu": {
|
"node_modules/@radix-ui/react-dropdown-menu": {
|
||||||
"version": "2.1.14",
|
"version": "2.1.16",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.14.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz",
|
||||||
"integrity": "sha512-lzuyNjoWOoaMFE/VC5FnAAYM16JmQA8ZmucOXtlhm2kKR5TSU95YLAueQ4JYuRmUJmBvSqXaVFGIfuukybwZJQ==",
|
"integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-menu": "2.1.14",
|
"@radix-ui/react-menu": "2.1.16",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
@ -2370,6 +2398,35 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-focus-guards": {
|
"node_modules/@radix-ui/react-focus-guards": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz",
|
||||||
|
@ -2429,26 +2486,26 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-menu": {
|
"node_modules/@radix-ui/react-menu": {
|
||||||
"version": "2.1.14",
|
"version": "2.1.16",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.14.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
|
||||||
"integrity": "sha512-0zSiBAIFq9GSKoSH5PdEaQeRB3RnEGxC+H2P0egtnKoKKLNBH8VBHyVO6/jskhjAezhOIplyRUj7U2lds9A+Yg==",
|
"integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-collection": "1.1.6",
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-direction": "1.1.1",
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
"@radix-ui/react-dismissable-layer": "1.1.9",
|
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||||
"@radix-ui/react-focus-guards": "1.1.2",
|
"@radix-ui/react-focus-guards": "1.1.3",
|
||||||
"@radix-ui/react-focus-scope": "1.1.6",
|
"@radix-ui/react-focus-scope": "1.1.7",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-popper": "1.2.6",
|
"@radix-ui/react-popper": "1.2.8",
|
||||||
"@radix-ui/react-portal": "1.1.8",
|
"@radix-ui/react-portal": "1.1.9",
|
||||||
"@radix-ui/react-presence": "1.1.4",
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-roving-focus": "1.1.9",
|
"@radix-ui/react-roving-focus": "1.1.11",
|
||||||
"@radix-ui/react-slot": "1.2.2",
|
"@radix-ui/react-slot": "1.2.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"aria-hidden": "^1.2.4",
|
"aria-hidden": "^1.2.4",
|
||||||
"react-remove-scroll": "^2.6.3"
|
"react-remove-scroll": "^2.6.3"
|
||||||
|
@ -2468,14 +2525,44 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/primitive": {
|
||||||
"version": "1.2.2",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
"integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
|
"version": "1.1.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||||
|
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-compose-refs": "1.1.2"
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"@radix-ui/react-use-escape-keydown": "1.1.1"
|
||||||
},
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-focus-guards": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
|
||||||
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "*",
|
"@types/react": "*",
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
@ -2486,17 +2573,113 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-focus-scope": {
|
||||||
|
"version": "1.1.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
|
||||||
|
"integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-portal": {
|
||||||
|
"version": "1.1.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
|
"integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-presence": {
|
||||||
|
"version": "1.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
|
||||||
|
"integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-popper": {
|
"node_modules/@radix-ui/react-popper": {
|
||||||
"version": "1.2.6",
|
"version": "1.2.8",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.6.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
||||||
"integrity": "sha512-7iqXaOWIjDBfIG7aq8CUEeCSsQMLFdn7VEE8TaFz704DtEzpPHR7w/uuzRflvKgltqSAImgcmxQ7fFX3X7wasg==",
|
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@floating-ui/react-dom": "^2.0.0",
|
"@floating-ui/react-dom": "^2.0.0",
|
||||||
"@radix-ui/react-arrow": "1.1.6",
|
"@radix-ui/react-arrow": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||||
"@radix-ui/react-use-rect": "1.1.1",
|
"@radix-ui/react-use-rect": "1.1.1",
|
||||||
|
@ -2518,6 +2701,29 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-portal": {
|
"node_modules/@radix-ui/react-portal": {
|
||||||
"version": "1.1.8",
|
"version": "1.1.8",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.8.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.8.tgz",
|
||||||
|
@ -2608,18 +2814,18 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-roving-focus": {
|
"node_modules/@radix-ui/react-roving-focus": {
|
||||||
"version": "1.1.9",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz",
|
||||||
"integrity": "sha512-ZzrIFnMYHHCNqSNCsuN6l7wlewBEq0O0BCSBkabJMFXVO51LRUTq71gLP1UxFvmrXElqmPjA5VX7IqC9VpazAQ==",
|
"integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/primitive": "1.1.2",
|
"@radix-ui/primitive": "1.1.3",
|
||||||
"@radix-ui/react-collection": "1.1.6",
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
"@radix-ui/react-context": "1.1.2",
|
"@radix-ui/react-context": "1.1.2",
|
||||||
"@radix-ui/react-direction": "1.1.1",
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
"@radix-ui/react-id": "1.1.1",
|
"@radix-ui/react-id": "1.1.1",
|
||||||
"@radix-ui/react-primitive": "2.1.2",
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
},
|
},
|
||||||
|
@ -2638,6 +2844,35 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/primitive": {
|
||||||
|
"version": "1.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||||
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/react-primitive": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-slot": "1.2.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-select": {
|
"node_modules/@radix-ui/react-select": {
|
||||||
"version": "2.2.5",
|
"version": "2.2.5",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
|
||||||
|
@ -2681,55 +2916,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-arrow": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-primitive": "2.1.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-collection": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
|
||||||
"@radix-ui/react-context": "1.1.2",
|
|
||||||
"@radix-ui/react-primitive": "2.1.3",
|
|
||||||
"@radix-ui/react-slot": "1.2.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
|
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
"version": "1.1.10",
|
"version": "1.1.10",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
|
||||||
|
@ -2965,29 +3151,6 @@
|
||||||
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": {
|
|
||||||
"version": "1.1.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
|
||||||
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@radix-ui/react-primitive": "2.1.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
|
||||||
"version": "1.1.11",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||||
|
@ -3015,38 +3178,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": {
|
|
||||||
"version": "1.2.8",
|
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
|
||||||
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"@floating-ui/react-dom": "^2.0.0",
|
|
||||||
"@radix-ui/react-arrow": "1.1.7",
|
|
||||||
"@radix-ui/react-compose-refs": "1.1.2",
|
|
||||||
"@radix-ui/react-context": "1.1.2",
|
|
||||||
"@radix-ui/react-primitive": "2.1.3",
|
|
||||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
|
||||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
|
||||||
"@radix-ui/react-use-rect": "1.1.1",
|
|
||||||
"@radix-ui/react-use-size": "1.1.1",
|
|
||||||
"@radix-ui/rect": "1.1.1"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@types/react": "*",
|
|
||||||
"@types/react-dom": "*",
|
|
||||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
|
||||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@types/react": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@types/react-dom": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
|
"node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
|
||||||
"version": "1.1.9",
|
"version": "1.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
|
@ -3447,6 +3578,13 @@
|
||||||
"tailwindcss": "4.1.6"
|
"tailwindcss": "4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@tailwindcss/node/node_modules/tailwindcss": {
|
||||||
|
"version": "4.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||||
|
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@tailwindcss/oxide": {
|
"node_modules/@tailwindcss/oxide": {
|
||||||
"version": "4.1.6",
|
"version": "4.1.6",
|
||||||
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
|
||||||
|
@ -3707,6 +3845,13 @@
|
||||||
"tailwindcss": "4.1.6"
|
"tailwindcss": "4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
|
||||||
|
"version": "4.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||||
|
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@testing-library/dom": {
|
"node_modules/@testing-library/dom": {
|
||||||
"version": "10.4.1",
|
"version": "10.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
|
||||||
|
@ -4079,9 +4224,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/react-dom": {
|
"node_modules/@types/react-dom": {
|
||||||
"version": "19.1.5",
|
"version": "19.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.5.tgz",
|
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.9.tgz",
|
||||||
"integrity": "sha512-CMCjrWucUBZvohgZxkjd6S9h0nZxXjzus6yDfUb+xLxYM7VvjKNH1tQrE9GWLql1XoOP4/Ds3bwFqShHUYraGg==",
|
"integrity": "sha512-qXRuZaOsAdXKFyOhRBg6Lqqc0yay13vN7KrIg4L7N4aaHN68ma9OK3NE1BoDFgFOTfM7zg+3/8+2n8rLUH3OKQ==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
@ -10147,9 +10292,9 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/llama-stack-client": {
|
"node_modules/llama-stack-client": {
|
||||||
"version": "0.2.20",
|
"version": "0.2.21",
|
||||||
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
|
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.21.tgz",
|
||||||
"integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
|
"integrity": "sha512-rjU2Vx5xStxDYavU8K1An/SYXiQQjroLcK98B+p0Paz/a7OgRao2S0YwvThJjPUyChY4fO03UIXP9LpmHqlXWQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/node": "^18.11.18",
|
"@types/node": "^18.11.18",
|
||||||
|
@ -10240,9 +10385,9 @@
|
||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
"node_modules/lucide-react": {
|
"node_modules/lucide-react": {
|
||||||
"version": "0.510.0",
|
"version": "0.542.0",
|
||||||
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.510.0.tgz",
|
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.542.0.tgz",
|
||||||
"integrity": "sha512-p8SQRAMVh7NhsAIETokSqDrc5CHnDLbV29mMnzaXx+Vc/hnqQzwI2r0FMWCcoTXnbw2KEjy48xwpGdEL+ck06Q==",
|
"integrity": "sha512-w3hD8/SQB7+lzU2r4VdFyzzOzKnUjTZIF/MQJGSSvni7Llewni4vuViRppfRAa2guOsY5k4jZyxw/i9DQHv+dw==",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||||
|
@ -12448,24 +12593,24 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react": {
|
"node_modules/react": {
|
||||||
"version": "19.1.0",
|
"version": "19.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||||
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
"integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-dom": {
|
"node_modules/react-dom": {
|
||||||
"version": "19.1.0",
|
"version": "19.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.1.tgz",
|
||||||
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
"integrity": "sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.26.0"
|
"scheduler": "^0.26.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^19.1.0"
|
"react": "^19.1.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-is": {
|
"node_modules/react-is": {
|
||||||
|
@ -13285,9 +13430,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/sonner": {
|
"node_modules/sonner": {
|
||||||
"version": "2.0.6",
|
"version": "2.0.7",
|
||||||
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.6.tgz",
|
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
|
||||||
"integrity": "sha512-yHFhk8T/DK3YxjFQXIrcHT1rGEeTLliVzWbO0xN8GberVun2RiBnxAjXAYpZrqwEVHBG9asI/Li8TAAhN9m59Q==",
|
"integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
|
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
|
||||||
|
@ -13712,9 +13857,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/tailwindcss": {
|
"node_modules/tailwindcss": {
|
||||||
"version": "4.1.6",
|
"version": "4.1.13",
|
||||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
|
||||||
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
"integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-collapsible": "^1.1.12",
|
"@radix-ui/react-collapsible": "^1.1.12",
|
||||||
"@radix-ui/react-dialog": "^1.1.13",
|
"@radix-ui/react-dialog": "^1.1.13",
|
||||||
"@radix-ui/react-dropdown-menu": "^2.1.14",
|
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||||
"@radix-ui/react-select": "^2.2.5",
|
"@radix-ui/react-select": "^2.2.5",
|
||||||
"@radix-ui/react-separator": "^1.1.7",
|
"@radix-ui/react-separator": "^1.1.7",
|
||||||
"@radix-ui/react-slot": "^1.2.3",
|
"@radix-ui/react-slot": "^1.2.3",
|
||||||
|
@ -23,18 +23,18 @@
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.20",
|
"llama-stack-client": "^0.2.21",
|
||||||
"lucide-react": "^0.510.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.3.3",
|
"next": "15.3.3",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.0.0",
|
"react-dom": "^19.1.1",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.30.0",
|
"remeda": "^2.30.0",
|
||||||
"shiki": "^1.29.2",
|
"shiki": "^1.29.2",
|
||||||
"sonner": "^2.0.6",
|
"sonner": "^2.0.7",
|
||||||
"tailwind-merge": "^3.3.1"
|
"tailwind-merge": "^3.3.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue