Merge branch 'llamastack:main' into model_unregisteration_error_message
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
||||||
|
|
||||||
# These owners will be the default owners for everything in
|
# These owners will be the default owners for everything in
|
||||||
# the repo. Unless a later match takes precedence,
|
# the repo. Unless a later match takes precedence,
|
||||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||||
|
|
|
||||||
2
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -2,7 +2,7 @@ blank_issues_enabled: false
|
||||||
|
|
||||||
contact_links:
|
contact_links:
|
||||||
- name: Have you read the docs?
|
- name: Have you read the docs?
|
||||||
url: https://llamastack.github.io/latest/providers/external/index.html
|
url: https://llamastack.github.io/providers/external/index.html
|
||||||
about: Much help can be found in the docs
|
about: Much help can be found in the docs
|
||||||
- name: Start a discussion
|
- name: Start a discussion
|
||||||
url: https://github.com/llamastack/llama-stack/discussions/new/
|
url: https://github.com/llamastack/llama-stack/discussions/new/
|
||||||
|
|
|
||||||
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
||||||
# This file documents Triage members in the Llama Stack community
|
# This file documents Triage members in the Llama Stack community
|
||||||
@franciscojavierarceo
|
|
||||||
|
|
|
||||||
1
.github/workflows/README.md
vendored
|
|
@ -12,6 +12,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
|
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||||
|
|
|
||||||
91
.github/workflows/conformance.yml
vendored
|
|
@ -1,6 +1,11 @@
|
||||||
# API Conformance Tests
|
# API Conformance Tests
|
||||||
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
||||||
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
||||||
|
#
|
||||||
|
# The workflow handles both monolithic and split API specifications:
|
||||||
|
# - If split specs exist (stable/experimental/deprecated), they are stitched together for comparison
|
||||||
|
# - If only monolithic spec exists, it is used directly
|
||||||
|
# This allows for clean API organization while maintaining robust conformance testing
|
||||||
|
|
||||||
name: API Conformance Tests
|
name: API Conformance Tests
|
||||||
|
|
||||||
|
|
@ -11,10 +16,13 @@ on:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened, edited]
|
||||||
paths:
|
paths:
|
||||||
- 'docs/static/llama-stack-spec.yaml'
|
- 'docs/static/llama-stack-spec.yaml' # Legacy monolithic spec
|
||||||
- 'docs/static/llama-stack-spec.html'
|
- 'docs/static/stable-llama-stack-spec.yaml' # Stable APIs spec
|
||||||
|
- 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec
|
||||||
|
- 'docs/static/deprecated-llama-stack-spec.yaml' # Deprecated APIs spec
|
||||||
|
- 'docs/static/llama-stack-spec.html' # Legacy HTML spec
|
||||||
- '.github/workflows/conformance.yml' # This workflow itself
|
- '.github/workflows/conformance.yml' # This workflow itself
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
|
|
@ -27,14 +35,31 @@ jobs:
|
||||||
check-schema-compatibility:
|
check-schema-compatibility:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
# Using specific version 4.1.7 because 5.0.0 fails when trying to run this locally using `act`
|
|
||||||
# This ensures consistent behavior between local testing and CI
|
|
||||||
- name: Checkout PR Code
|
- name: Checkout PR Code
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
# Check if we should skip conformance testing due to breaking changes
|
||||||
|
- name: Check if conformance test should be skipped
|
||||||
|
id: skip-check
|
||||||
|
run: |
|
||||||
|
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||||
|
|
||||||
|
# Skip if title contains "!:" indicating breaking change (like "feat!:")
|
||||||
|
if [[ "$PR_TITLE" == *"!:"* ]]; then
|
||||||
|
echo "skip=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get all commits in this PR and check for BREAKING CHANGE footer
|
||||||
|
git log --format="%B" ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} | \
|
||||||
|
grep -q "BREAKING CHANGE:" && echo "skip=true" >> $GITHUB_OUTPUT || echo "skip=false" >> $GITHUB_OUTPUT
|
||||||
|
shell: bash
|
||||||
# Checkout the base branch to compare against (usually main)
|
# Checkout the base branch to compare against (usually main)
|
||||||
# This allows us to diff the current changes against the previous state
|
# This allows us to diff the current changes against the previous state
|
||||||
- name: Checkout Base Branch
|
- name: Checkout Base Branch
|
||||||
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
with:
|
with:
|
||||||
ref: ${{ github.event.pull_request.base.ref }}
|
ref: ${{ github.event.pull_request.base.ref }}
|
||||||
|
|
@ -42,6 +67,7 @@ jobs:
|
||||||
|
|
||||||
# Cache oasdiff to avoid checksum failures and speed up builds
|
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||||
- name: Cache oasdiff
|
- name: Cache oasdiff
|
||||||
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
id: cache-oasdiff
|
id: cache-oasdiff
|
||||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
|
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
|
||||||
with:
|
with:
|
||||||
|
|
@ -50,20 +76,69 @@ jobs:
|
||||||
|
|
||||||
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||||
- name: Install oasdiff
|
- name: Install oasdiff
|
||||||
if: steps.cache-oasdiff.outputs.cache-hit != 'true'
|
if: steps.skip-check.outputs.skip != 'true' && steps.cache-oasdiff.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||||
cp /usr/local/bin/oasdiff ~/oasdiff
|
cp /usr/local/bin/oasdiff ~/oasdiff
|
||||||
|
|
||||||
# Setup cached oasdiff
|
# Setup cached oasdiff
|
||||||
- name: Setup cached oasdiff
|
- name: Setup cached oasdiff
|
||||||
if: steps.cache-oasdiff.outputs.cache-hit == 'true'
|
if: steps.skip-check.outputs.skip != 'true' && steps.cache-oasdiff.outputs.cache-hit == 'true'
|
||||||
run: |
|
run: |
|
||||||
sudo cp ~/oasdiff /usr/local/bin/oasdiff
|
sudo cp ~/oasdiff /usr/local/bin/oasdiff
|
||||||
sudo chmod +x /usr/local/bin/oasdiff
|
sudo chmod +x /usr/local/bin/oasdiff
|
||||||
|
|
||||||
|
# Install yq for YAML processing
|
||||||
|
- name: Install yq
|
||||||
|
run: |
|
||||||
|
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
|
||||||
|
sudo chmod +x /usr/local/bin/yq
|
||||||
|
|
||||||
|
# Verify API specs exist for conformance testing
|
||||||
|
- name: Check API Specs
|
||||||
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
|
run: |
|
||||||
|
echo "Checking for API specification files..."
|
||||||
|
|
||||||
|
# Check current branch
|
||||||
|
if [ -f "docs/static/stable-llama-stack-spec.yaml" ]; then
|
||||||
|
echo "✓ Found stable API spec in current branch"
|
||||||
|
CURRENT_SPEC="docs/static/stable-llama-stack-spec.yaml"
|
||||||
|
elif [ -f "docs/static/llama-stack-spec.yaml" ]; then
|
||||||
|
echo "✓ Found monolithic API spec in current branch"
|
||||||
|
CURRENT_SPEC="docs/static/llama-stack-spec.yaml"
|
||||||
|
else
|
||||||
|
echo "❌ No API specs found in current branch"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check base branch
|
||||||
|
if [ -f "base/docs/static/stable-llama-stack-spec.yaml" ]; then
|
||||||
|
echo "✓ Found stable API spec in base branch"
|
||||||
|
BASE_SPEC="base/docs/static/stable-llama-stack-spec.yaml"
|
||||||
|
elif [ -f "base/docs/static/llama-stack-spec.yaml" ]; then
|
||||||
|
echo "✓ Found monolithic API spec in base branch"
|
||||||
|
BASE_SPEC="base/docs/static/llama-stack-spec.yaml"
|
||||||
|
else
|
||||||
|
echo "❌ No API specs found in base branch"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Export for next step
|
||||||
|
echo "BASE_SPEC=${BASE_SPEC}" >> $GITHUB_ENV
|
||||||
|
echo "CURRENT_SPEC=${CURRENT_SPEC}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
echo "Will compare: ${BASE_SPEC} -> ${CURRENT_SPEC}"
|
||||||
|
|
||||||
# Run oasdiff to detect breaking changes in the API specification
|
# Run oasdiff to detect breaking changes in the API specification
|
||||||
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||||
- name: Run OpenAPI Breaking Change Diff
|
- name: Run OpenAPI Breaking Change Diff
|
||||||
|
if: steps.skip-check.outputs.skip != 'true'
|
||||||
run: |
|
run: |
|
||||||
oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/'
|
oasdiff breaking --fail-on ERR $BASE_SPEC $CURRENT_SPEC --match-path '^/v1/'
|
||||||
|
|
||||||
|
# Report when test is skipped
|
||||||
|
- name: Report skip reason
|
||||||
|
if: steps.skip-check.outputs.skip == 'true'
|
||||||
|
run: |
|
||||||
|
echo "Conformance test skipped due to breaking change indicator"
|
||||||
|
|
|
||||||
2
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -84,6 +84,8 @@ jobs:
|
||||||
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml
|
||||||
cat $run_dir/run.yaml
|
cat $run_dir/run.yaml
|
||||||
|
|
||||||
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
|
export COLUMNS=1984
|
||||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
|
|
|
||||||
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
|
||||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
suite: [base, vision]
|
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||||
|
# Weekly schedule (Sun 1 AM): vllm+base
|
||||||
|
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||||
|
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||||
|
config: >-
|
||||||
|
${{
|
||||||
|
github.event.schedule == '1 0 * * 0'
|
||||||
|
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||||
|
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||||
|
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
}}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
@ -64,14 +73,14 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
|
|
|
||||||
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
|
|
@ -0,0 +1,227 @@
|
||||||
|
name: Pre-commit Bot
|
||||||
|
|
||||||
|
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
|
||||||
|
|
||||||
|
on:
|
||||||
|
issue_comment:
|
||||||
|
types: [created]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
pre-commit:
|
||||||
|
# Only run on pull request comments
|
||||||
|
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check comment author and get PR details
|
||||||
|
id: check_author
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
// Get PR details
|
||||||
|
const pr = await github.rest.pulls.get({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
pull_number: context.issue.number
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check if commenter has write access or is the PR author
|
||||||
|
const commenter = context.payload.comment.user.login;
|
||||||
|
const prAuthor = pr.data.user.login;
|
||||||
|
|
||||||
|
let hasPermission = false;
|
||||||
|
|
||||||
|
// Check if commenter is PR author
|
||||||
|
if (commenter === prAuthor) {
|
||||||
|
hasPermission = true;
|
||||||
|
console.log(`Comment author ${commenter} is the PR author`);
|
||||||
|
} else {
|
||||||
|
// Check if commenter has write/admin access
|
||||||
|
try {
|
||||||
|
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
username: commenter
|
||||||
|
});
|
||||||
|
|
||||||
|
const level = permission.data.permission;
|
||||||
|
hasPermission = ['write', 'admin', 'maintain'].includes(level);
|
||||||
|
console.log(`Comment author ${commenter} has permission: ${level}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasPermission) {
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.issue.number,
|
||||||
|
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
|
||||||
|
});
|
||||||
|
core.setFailed(`User ${commenter} does not have permission`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save PR info for later steps
|
||||||
|
core.setOutput('pr_number', context.issue.number);
|
||||||
|
core.setOutput('pr_head_ref', pr.data.head.ref);
|
||||||
|
core.setOutput('pr_head_sha', pr.data.head.sha);
|
||||||
|
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
|
||||||
|
core.setOutput('pr_base_ref', pr.data.base.ref);
|
||||||
|
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
|
||||||
|
core.setOutput('authorized', 'true');
|
||||||
|
|
||||||
|
- name: React to comment
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.reactions.createForIssueComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
comment_id: context.payload.comment.id,
|
||||||
|
content: 'rocket'
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment starting
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Checkout PR branch (same-repo)
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Checkout PR branch (fork)
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
repository: ${{ steps.check_author.outputs.pr_head_repo }}
|
||||||
|
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Verify checkout
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
run: |
|
||||||
|
echo "Current SHA: $(git rev-parse HEAD)"
|
||||||
|
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
|
||||||
|
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
|
||||||
|
echo "::error::Checked out SHA does not match expected SHA"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
cache: pip
|
||||||
|
cache-dependency-path: |
|
||||||
|
**/requirements*.txt
|
||||||
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: 'llama_stack/ui/'
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
run: npm ci
|
||||||
|
working-directory: llama_stack/ui
|
||||||
|
|
||||||
|
- name: Run pre-commit
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
id: precommit
|
||||||
|
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
|
continue-on-error: true
|
||||||
|
env:
|
||||||
|
SKIP: no-commit-to-branch
|
||||||
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
||||||
|
- name: Check for changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
id: changes
|
||||||
|
run: |
|
||||||
|
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "Changes detected after pre-commit"
|
||||||
|
else
|
||||||
|
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "No changes after pre-commit"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Commit and push changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||||
|
run: |
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
||||||
|
|
||||||
|
git add -A
|
||||||
|
git commit -m "style: apply pre-commit fixes
|
||||||
|
|
||||||
|
🤖 Applied by @github-actions bot via pre-commit workflow"
|
||||||
|
|
||||||
|
# Push changes
|
||||||
|
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
|
||||||
|
- name: Comment success with changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment success without changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment failure
|
||||||
|
if: failure()
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
|
||||||
|
});
|
||||||
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
@ -150,7 +150,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ Before pushing your changes, make sure that the pre-commit hooks have passed suc
|
||||||
|
|
||||||
We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md).
|
We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md).
|
||||||
|
|
||||||
If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later.
|
If in doubt, please open a [discussion](https://github.com/llamastack/llama-stack/discussions); we can always convert that to an issue later.
|
||||||
|
|
||||||
### Issues
|
### Issues
|
||||||
We use GitHub issues to track public bugs. Please ensure your description is
|
We use GitHub issues to track public bugs. Please ensure your description is
|
||||||
|
|
@ -165,8 +165,8 @@ Building a stack image will use the production version of the `llama-stack` and
|
||||||
Example:
|
Example:
|
||||||
```bash
|
```bash
|
||||||
cd work/
|
cd work/
|
||||||
git clone https://github.com/meta-llama/llama-stack.git
|
git clone https://github.com/llamastack/llama-stack.git
|
||||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||
|
|
||||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
|
|
@ -120,7 +120,7 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
|
||||||
|
|
||||||
### API Providers
|
### API Providers
|
||||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||||
Please checkout for [full list](https://llamastack.github.io/latest/providers/index.html)
|
Please checkout for [full list](https://llamastack.github.io/docs/providers)
|
||||||
|
|
||||||
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||||
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||||
|
|
@ -151,7 +151,7 @@ Please checkout for [full list](https://llamastack.github.io/latest/providers/in
|
||||||
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||||
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||||
|
|
||||||
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/latest/providers/external/index.html) documentation.
|
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
|
||||||
|
|
||||||
### Distributions
|
### Distributions
|
||||||
|
|
||||||
|
|
|
||||||
49
docs/docs/api-overview.md
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
# API Reference Overview
|
||||||
|
|
||||||
|
The Llama Stack provides a comprehensive set of APIs organized by stability level to help you choose the right endpoints for your use case.
|
||||||
|
|
||||||
|
## 🟢 Stable APIs
|
||||||
|
|
||||||
|
**Production-ready APIs with backward compatibility guarantees.**
|
||||||
|
|
||||||
|
These APIs are fully tested, documented, and stable. They follow semantic versioning principles and maintain backward compatibility within major versions. Recommended for production applications.
|
||||||
|
|
||||||
|
[**Browse Stable APIs →**](./api/llama-stack-specification)
|
||||||
|
|
||||||
|
**Key Features:**
|
||||||
|
- ✅ Backward compatibility guaranteed
|
||||||
|
- ✅ Comprehensive testing and validation
|
||||||
|
- ✅ Production-ready reliability
|
||||||
|
- ✅ Long-term support
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🟡 Experimental APIs
|
||||||
|
|
||||||
|
**Preview APIs that may change before becoming stable.**
|
||||||
|
|
||||||
|
These APIs include v1alpha and v1beta endpoints that are feature-complete but may undergo changes based on feedback. Great for exploring new capabilities and providing feedback.
|
||||||
|
|
||||||
|
[**Browse Experimental APIs →**](./api-experimental/llama-stack-specification-experimental-apis)
|
||||||
|
|
||||||
|
**Key Features:**
|
||||||
|
- 🧪 Latest features and capabilities
|
||||||
|
- 🧪 May change based on user feedback
|
||||||
|
- 🧪 Active development and iteration
|
||||||
|
- 🧪 Opportunity to influence final design
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔴 Deprecated APIs
|
||||||
|
|
||||||
|
**Legacy APIs for migration reference.**
|
||||||
|
|
||||||
|
These APIs are deprecated and will be removed in future versions. They are provided for migration purposes and to help transition to newer, stable alternatives.
|
||||||
|
|
||||||
|
[**Browse Deprecated APIs →**](./api-deprecated/llama-stack-specification-deprecated-apis)
|
||||||
|
|
||||||
|
**Key Features:**
|
||||||
|
- ⚠️ Will be removed in future versions
|
||||||
|
- ⚠️ Migration guidance provided
|
||||||
|
- ⚠️ Use for compatibility during transition
|
||||||
|
- ⚠️ Not recommended for new projects
|
||||||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
||||||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||||
|
|
||||||
## Visualization with Jaeger
|
### Quick Setup: Complete Telemetry Stack
|
||||||
|
|
||||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||||
|
|
||||||
### Starting Jaeger
|
|
||||||
|
|
||||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run --pull always --rm --name jaeger \
|
./scripts/telemetry/setup_telemetry.sh
|
||||||
-p 16686:16686 -p 4318:4318 \
|
|
||||||
jaegertracing/jaeger:2.1.0
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
This sets up:
|
||||||
|
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||||
|
- **Prometheus**: http://localhost:9090 (metrics)
|
||||||
|
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||||
|
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||||
|
|
||||||
|
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||||
|
|
||||||
## Querying Metrics
|
## Querying Metrics
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,7 @@ Once defined, simply pass the tool to the agent config. `Agent` will take care o
|
||||||
agent = Agent(client, ..., tools=[my_tool])
|
agent = Agent(client, ..., tools=[my_tool])
|
||||||
```
|
```
|
||||||
|
|
||||||
Refer to [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/e2e_loop_with_client_tools.py) for an example of how to use client provided tools.
|
Refer to [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/) for an example of how to use client provided tools.
|
||||||
|
|
||||||
## Tool Invocation
|
## Tool Invocation
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,6 @@ __all__ = ["WeatherAPI", "available_providers"]
|
||||||
from typing import Protocol
|
from typing import Protocol
|
||||||
|
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
AdapterSpec,
|
|
||||||
Api,
|
Api,
|
||||||
ProviderSpec,
|
ProviderSpec,
|
||||||
RemoteProviderSpec,
|
RemoteProviderSpec,
|
||||||
|
|
@ -166,13 +165,11 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.weather,
|
api=Api.weather,
|
||||||
provider_type="remote::kaze",
|
provider_type="remote::kaze",
|
||||||
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
||||||
adapter=AdapterSpec(
|
|
||||||
adapter_type="kaze",
|
adapter_type="kaze",
|
||||||
module="llama_stack_provider_kaze",
|
module="llama_stack_provider_kaze",
|
||||||
pip_packages=["llama_stack_provider_kaze"],
|
pip_packages=["llama_stack_provider_kaze"],
|
||||||
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
||||||
),
|
),
|
||||||
),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -325,11 +322,10 @@ class WeatherKazeAdapter(WeatherProvider):
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# ~/.llama/providers.d/remote/weather/kaze.yaml
|
# ~/.llama/providers.d/remote/weather/kaze.yaml
|
||||||
adapter:
|
adapter_type: kaze
|
||||||
adapter_type: kaze
|
pip_packages: ["llama_stack_provider_kaze"]
|
||||||
pip_packages: ["llama_stack_provider_kaze"]
|
config_class: llama_stack_provider_kaze.config.KazeProviderConfig
|
||||||
config_class: llama_stack_provider_kaze.config.KazeProviderConfig
|
module: llama_stack_provider_kaze
|
||||||
module: llama_stack_provider_kaze
|
|
||||||
optional_api_dependencies: []
|
optional_api_dependencies: []
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -361,7 +357,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
llama stack run ~/.llama/run-byoa.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ spec:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5000
|
- containerPort: 5000
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
value: "${SAFETY_MODEL}"
|
value: "${SAFETY_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8321
|
- containerPort: 8321
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -131,4 +131,4 @@ graph TD
|
||||||
3. **Configure your providers** with API keys or local models
|
3. **Configure your providers** with API keys or local models
|
||||||
4. **Start building** with Llama Stack!
|
4. **Start building** with Llama Stack!
|
||||||
|
|
||||||
For help choosing or troubleshooting, check our [Getting Started Guide](/docs/getting_started/quickstart) or [Community Support](https://github.com/llama-stack/llama-stack/discussions).
|
For help choosing or troubleshooting, check our [Getting Started Guide](/docs/getting_started/quickstart) or [Community Support](https://github.com/llamastack/llama-stack/discussions).
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ You can start a chroma-db easily using docker.
|
||||||
# This is where the indices are persisted
|
# This is where the indices are persisted
|
||||||
mkdir -p $HOME/chromadb
|
mkdir -p $HOME/chromadb
|
||||||
|
|
||||||
podman run --rm -it \
|
docker run --rm -it \
|
||||||
--network host \
|
--network host \
|
||||||
--name chromadb \
|
--name chromadb \
|
||||||
-v $HOME/chromadb:/chroma/chroma \
|
-v $HOME/chromadb:/chroma/chroma \
|
||||||
|
|
@ -127,7 +127,7 @@ docker run -it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||||
-v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
|
-v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
|
||||||
# localhost/distribution-dell:dev if building / testing locally
|
# localhost/distribution-dell:dev if building / testing locally
|
||||||
llamastack/distribution-dell\
|
llamastack/distribution-dell\
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,13 @@ Llama Stack is the open-source framework for building generative AI applications
|
||||||
|
|
||||||
:::tip Llama 4 is here!
|
:::tip Llama 4 is here!
|
||||||
|
|
||||||
Check out [Getting Started with Llama 4](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started_llama4.ipynb)
|
Check out [Getting Started with Llama 4](https://colab.research.google.com/github/llamastack/llama-stack/blob/main/docs/getting_started_llama4.ipynb)
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::tip News
|
:::tip News
|
||||||
|
|
||||||
Llama Stack is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases) for more details.
|
Llama Stack is now available! See the [release notes](https://github.com/llamastack/llama-stack/releases) for more details.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
|
@ -45,7 +45,7 @@ Llama Stack consists of a server (with multiple pluggable API providers) and Cli
|
||||||
|
|
||||||
## Quick Links
|
## Quick Links
|
||||||
|
|
||||||
- Ready to build? Check out the [Getting Started Guide](https://llama-stack.github.io/getting_started/quickstart) to get started.
|
- Ready to build? Check out the [Getting Started Guide](/docs/getting_started/quickstart) to get started.
|
||||||
- Want to contribute? See the [Contributing Guide](https://github.com/llamastack/llama-stack/blob/main/CONTRIBUTING.md).
|
- Want to contribute? See the [Contributing Guide](https://github.com/llamastack/llama-stack/blob/main/CONTRIBUTING.md).
|
||||||
- Explore [Example Applications](https://github.com/llamastack/llama-stack-apps) built with Llama Stack.
|
- Explore [Example Applications](https://github.com/llamastack/llama-stack-apps) built with Llama Stack.
|
||||||
|
|
||||||
|
|
@ -59,13 +59,13 @@ Llama Stack provides adapters for popular providers across all API categories:
|
||||||
- **Training & Evaluation**: HuggingFace, TorchTune, NVIDIA NEMO
|
- **Training & Evaluation**: HuggingFace, TorchTune, NVIDIA NEMO
|
||||||
|
|
||||||
:::info Provider Details
|
:::info Provider Details
|
||||||
For complete provider compatibility and setup instructions, see our [Providers Documentation](https://llamastack.github.io/providers/).
|
For complete provider compatibility and setup instructions, see our [Providers Documentation](https://llamastack.github.io/docs/providers/).
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Get Started Today
|
## Get Started Today
|
||||||
|
|
||||||
<div style={{display: 'flex', gap: '1rem', flexWrap: 'wrap', margin: '2rem 0'}}>
|
<div style={{display: 'flex', gap: '1rem', flexWrap: 'wrap', margin: '2rem 0'}}>
|
||||||
<a href="https://llama-stack.github.io/getting_started/quickstart"
|
<a href="/docs/getting_started/quickstart"
|
||||||
style={{
|
style={{
|
||||||
background: 'var(--ifm-color-primary)',
|
background: 'var(--ifm-color-primary)',
|
||||||
color: 'white',
|
color: 'white',
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Agents API for creating and interacting with agentic systems.
|
description: "Agents
|
||||||
|
|
||||||
Main functionalities provided by this API:
|
APIs for creating and interacting with agentic systems."
|
||||||
- Create agents with specific instructions and ability to use tools.
|
|
||||||
- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".
|
|
||||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
|
||||||
- Agents can be provided with various shields (see the Safety API for more details).
|
|
||||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details."
|
|
||||||
sidebar_label: Agents
|
sidebar_label: Agents
|
||||||
title: Agents
|
title: Agents
|
||||||
---
|
---
|
||||||
|
|
@ -15,13 +10,8 @@ title: Agents
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Agents API for creating and interacting with agentic systems.
|
Agents
|
||||||
|
|
||||||
Main functionalities provided by this API:
|
APIs for creating and interacting with agentic systems.
|
||||||
- Create agents with specific instructions and ability to use tools.
|
|
||||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
|
||||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
|
||||||
- Agents can be provided with various shields (see the Safety API for more details).
|
|
||||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **agents** API.
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
|
||||||
|
|
@ -11,38 +11,6 @@ an example entry in your build.yaml should look like:
|
||||||
module: ramalama_stack
|
module: ramalama_stack
|
||||||
```
|
```
|
||||||
|
|
||||||
Additionally you can configure the `external_providers_dir` in your Llama Stack configuration. This method is in the process of being deprecated in favor of the `module` method. If using this method, the external provider directory should contain your external provider specifications:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
external_providers_dir: ~/.llama/providers.d/
|
|
||||||
```
|
|
||||||
|
|
||||||
## Directory Structure
|
|
||||||
|
|
||||||
The external providers directory should follow this structure:
|
|
||||||
|
|
||||||
```
|
|
||||||
providers.d/
|
|
||||||
remote/
|
|
||||||
inference/
|
|
||||||
custom_ollama.yaml
|
|
||||||
vllm.yaml
|
|
||||||
vector_io/
|
|
||||||
qdrant.yaml
|
|
||||||
safety/
|
|
||||||
llama-guard.yaml
|
|
||||||
inline/
|
|
||||||
inference/
|
|
||||||
custom_ollama.yaml
|
|
||||||
vllm.yaml
|
|
||||||
vector_io/
|
|
||||||
qdrant.yaml
|
|
||||||
safety/
|
|
||||||
llama-guard.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Each YAML file in these directories defines a provider specification for that particular API.
|
|
||||||
|
|
||||||
## Provider Types
|
## Provider Types
|
||||||
|
|
||||||
Llama Stack supports two types of external providers:
|
Llama Stack supports two types of external providers:
|
||||||
|
|
@ -50,30 +18,37 @@ Llama Stack supports two types of external providers:
|
||||||
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
||||||
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
||||||
|
|
||||||
|
|
||||||
|
### Provider Specification (Common between inline and remote providers)
|
||||||
|
|
||||||
|
- `provider_type`: The type of the provider to be installed (remote or inline). eg. `remote::ollama`
|
||||||
|
- `api`: The API for this provider, eg. `inference`
|
||||||
|
- `config_class`: The full path to the configuration class
|
||||||
|
- `module`: The Python module containing the provider implementation
|
||||||
|
- `optional_api_dependencies`: List of optional Llama Stack APIs that this provider can use
|
||||||
|
- `api_dependencies`: List of Llama Stack APIs that this provider depends on
|
||||||
|
- `provider_data_validator`: Optional validator for provider data.
|
||||||
|
- `pip_packages`: List of Python packages required by the provider
|
||||||
|
|
||||||
### Remote Provider Specification
|
### Remote Provider Specification
|
||||||
|
|
||||||
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
adapter:
|
adapter_type: custom_ollama
|
||||||
adapter_type: custom_ollama
|
provider_type: "remote::ollama"
|
||||||
pip_packages:
|
pip_packages:
|
||||||
- ollama
|
- ollama
|
||||||
- aiohttp
|
- aiohttp
|
||||||
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
||||||
module: llama_stack_ollama_provider
|
module: llama_stack_ollama_provider
|
||||||
api_dependencies: []
|
api_dependencies: []
|
||||||
optional_api_dependencies: []
|
optional_api_dependencies: []
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Adapter Configuration
|
#### Remote Provider Configuration
|
||||||
|
|
||||||
The `adapter` section defines how to load and configure the provider:
|
- `adapter_type`: A unique identifier for this adapter, eg. `ollama`
|
||||||
|
|
||||||
- `adapter_type`: A unique identifier for this adapter
|
|
||||||
- `pip_packages`: List of Python packages required by the provider
|
|
||||||
- `config_class`: The full path to the configuration class
|
|
||||||
- `module`: The Python module containing the provider implementation
|
|
||||||
|
|
||||||
### Inline Provider Specification
|
### Inline Provider Specification
|
||||||
|
|
||||||
|
|
@ -81,6 +56,7 @@ Inline providers run locally within the Llama Stack process. Here's an example f
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
module: llama_stack_vector_provider
|
module: llama_stack_vector_provider
|
||||||
|
provider_type: inline::llama_stack_vector_provider
|
||||||
config_class: llama_stack_vector_provider.config.VectorStoreConfig
|
config_class: llama_stack_vector_provider.config.VectorStoreConfig
|
||||||
pip_packages:
|
pip_packages:
|
||||||
- faiss-cpu
|
- faiss-cpu
|
||||||
|
|
@ -95,12 +71,6 @@ container_image: custom-vector-store:latest # optional
|
||||||
|
|
||||||
#### Inline Provider Fields
|
#### Inline Provider Fields
|
||||||
|
|
||||||
- `module`: The Python module containing the provider implementation
|
|
||||||
- `config_class`: The full path to the configuration class
|
|
||||||
- `pip_packages`: List of Python packages required by the provider
|
|
||||||
- `api_dependencies`: List of Llama Stack APIs that this provider depends on
|
|
||||||
- `optional_api_dependencies`: List of optional Llama Stack APIs that this provider can use
|
|
||||||
- `provider_data_validator`: Optional validator for provider data
|
|
||||||
- `container_image`: Optional container image to use instead of pip packages
|
- `container_image`: Optional container image to use instead of pip packages
|
||||||
|
|
||||||
## Required Fields
|
## Required Fields
|
||||||
|
|
@ -113,20 +83,17 @@ All providers must contain a `get_provider_spec` function in their `provider` mo
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
ProviderSpec,
|
ProviderSpec,
|
||||||
Api,
|
Api,
|
||||||
AdapterSpec,
|
RemoteProviderSpec,
|
||||||
remote_provider_spec,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_provider_spec() -> ProviderSpec:
|
def get_provider_spec() -> ProviderSpec:
|
||||||
return remote_provider_spec(
|
return RemoteProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
|
||||||
adapter_type="ramalama",
|
adapter_type="ramalama",
|
||||||
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
||||||
config_class="ramalama_stack.config.RamalamaImplConfig",
|
config_class="ramalama_stack.config.RamalamaImplConfig",
|
||||||
module="ramalama_stack",
|
module="ramalama_stack",
|
||||||
),
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -197,18 +164,16 @@ information. Execute the test for the Provider type you are developing.
|
||||||
If your external provider isn't being loaded:
|
If your external provider isn't being loaded:
|
||||||
|
|
||||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
|
||||||
2. Verify that the YAML files are properly formatted.
|
2. Verify that the YAML files are properly formatted.
|
||||||
3. Ensure all required Python packages are installed.
|
3. Ensure all required Python packages are installed.
|
||||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
### Example using `external_providers_dir`: Custom Ollama Provider
|
### How to create an external provider module
|
||||||
|
|
||||||
Here's a complete example of creating and using a custom Ollama provider:
|
If you are creating a new external provider called `llama-stack-provider-ollama` here is how you would set up the package properly:
|
||||||
|
|
||||||
1. First, create the provider package:
|
1. First, create the provider package:
|
||||||
|
|
||||||
|
|
@ -230,33 +195,28 @@ requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
|
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Create the provider specification:
|
3. Install the provider:
|
||||||
|
|
||||||
```yaml
|
|
||||||
# ~/.llama/providers.d/remote/inference/custom_ollama.yaml
|
|
||||||
adapter:
|
|
||||||
adapter_type: custom_ollama
|
|
||||||
pip_packages: ["ollama", "aiohttp"]
|
|
||||||
config_class: llama_stack_provider_ollama.config.OllamaImplConfig
|
|
||||||
module: llama_stack_provider_ollama
|
|
||||||
api_dependencies: []
|
|
||||||
optional_api_dependencies: []
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Install the provider:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install -e .
|
uv pip install -e .
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Configure Llama Stack to use external providers:
|
4. Edit `provider.py`
|
||||||
|
|
||||||
```yaml
|
provider.py must be updated to contain `get_provider_spec`. This is used by llama stack to install the provider.
|
||||||
external_providers_dir: ~/.llama/providers.d/
|
|
||||||
|
```python
|
||||||
|
def get_provider_spec() -> ProviderSpec:
|
||||||
|
return RemoteProviderSpec(
|
||||||
|
api=Api.inference,
|
||||||
|
adapter_type="llama-stack-provider-ollama",
|
||||||
|
pip_packages=["ollama", "aiohttp"],
|
||||||
|
config_class="llama_stack_provider_ollama.config.OllamaImplConfig",
|
||||||
|
module="llama_stack_provider_ollama",
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
5. Implement the provider as outlined above with `get_provider_impl` or `get_adapter_impl`, etc.
|
||||||
|
|
||||||
|
|
||||||
### Example using `module`: ramalama-stack
|
### Example using `module`: ramalama-stack
|
||||||
|
|
||||||
|
|
@ -275,7 +235,6 @@ distribution_spec:
|
||||||
module: ramalama_stack==0.3.0a0
|
module: ramalama_stack==0.3.0a0
|
||||||
image_type: venv
|
image_type: venv
|
||||||
image_name: null
|
image_name: null
|
||||||
external_providers_dir: null
|
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
- aiosqlite
|
- aiosqlite
|
||||||
- sqlalchemy[asyncio]
|
- sqlalchemy[asyncio]
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||||
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `str \| None` | No | | The Groq API key |
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `str \| None` | No | | The Llama API key |
|
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Ollama inference provider for running local models through the Ollama runtime.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||||
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Passthrough inference provider for connecting to any external inference service
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||||
| `api_token` | `str \| None` | No | | The API token |
|
| `api_token` | `str \| None` | No | | The API token |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ Available Models:
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||||
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||||
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
| `api_token` | `str \| None` | No | fake | The API token |
|
| `api_token` | `str \| None` | No | fake | The API token |
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
||||||
| `project_id` | `str \| None` | No | | The Project ID key |
|
| `project_id` | `str \| None` | No | | The Project ID key |
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
|
|
||||||
|
|
@ -16,14 +16,14 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
|
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
|
||||||
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
||||||
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
||||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,50 @@ const config: Config = {
|
||||||
onBrokenMarkdownLinks: "warn",
|
onBrokenMarkdownLinks: "warn",
|
||||||
favicon: "img/favicon.ico",
|
favicon: "img/favicon.ico",
|
||||||
|
|
||||||
|
// Enhanced favicon and meta configuration
|
||||||
|
headTags: [
|
||||||
|
{
|
||||||
|
tagName: 'link',
|
||||||
|
attributes: {
|
||||||
|
rel: 'icon',
|
||||||
|
type: 'image/png',
|
||||||
|
sizes: '32x32',
|
||||||
|
href: '/img/favicon-32x32.png',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tagName: 'link',
|
||||||
|
attributes: {
|
||||||
|
rel: 'icon',
|
||||||
|
type: 'image/png',
|
||||||
|
sizes: '16x16',
|
||||||
|
href: '/img/favicon-16x16.png',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tagName: 'link',
|
||||||
|
attributes: {
|
||||||
|
rel: 'apple-touch-icon',
|
||||||
|
sizes: '180x180',
|
||||||
|
href: '/img/llama-stack-logo.png',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tagName: 'meta',
|
||||||
|
attributes: {
|
||||||
|
name: 'theme-color',
|
||||||
|
content: '#7C3AED', // Purple color from your logo
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tagName: 'link',
|
||||||
|
attributes: {
|
||||||
|
rel: 'manifest',
|
||||||
|
href: '/site.webmanifest',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
// GitHub pages deployment config.
|
// GitHub pages deployment config.
|
||||||
organizationName: 'reluctantfuturist',
|
organizationName: 'reluctantfuturist',
|
||||||
projectName: 'llama-stack',
|
projectName: 'llama-stack',
|
||||||
|
|
@ -26,9 +70,6 @@ const config: Config = {
|
||||||
{
|
{
|
||||||
docs: {
|
docs: {
|
||||||
sidebarPath: require.resolve("./sidebars.ts"),
|
sidebarPath: require.resolve("./sidebars.ts"),
|
||||||
// Please change this to your repo.
|
|
||||||
// Remove this to remove the "edit this page" links.
|
|
||||||
editUrl: 'https://github.com/meta-llama/llama-stack/tree/main/docs/',
|
|
||||||
docItemComponent: "@theme/ApiItem", // Derived from docusaurus-theme-openapi
|
docItemComponent: "@theme/ApiItem", // Derived from docusaurus-theme-openapi
|
||||||
},
|
},
|
||||||
blog: false,
|
blog: false,
|
||||||
|
|
@ -55,10 +96,27 @@ const config: Config = {
|
||||||
label: 'Docs',
|
label: 'Docs',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: 'docSidebar',
|
type: 'dropdown',
|
||||||
sidebarId: 'apiSidebar',
|
|
||||||
position: 'left',
|
|
||||||
label: 'API Reference',
|
label: 'API Reference',
|
||||||
|
position: 'left',
|
||||||
|
to: '/docs/api-overview',
|
||||||
|
items: [
|
||||||
|
{
|
||||||
|
type: 'docSidebar',
|
||||||
|
sidebarId: 'stableApiSidebar',
|
||||||
|
label: '🟢 Stable APIs',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'docSidebar',
|
||||||
|
sidebarId: 'experimentalApiSidebar',
|
||||||
|
label: '🟡 Experimental APIs',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'docSidebar',
|
||||||
|
sidebarId: 'deprecatedApiSidebar',
|
||||||
|
label: '🔴 Deprecated APIs',
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
href: 'https://github.com/llamastack/llama-stack',
|
href: 'https://github.com/llamastack/llama-stack',
|
||||||
|
|
@ -83,7 +141,7 @@ const config: Config = {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
label: 'API Reference',
|
label: 'API Reference',
|
||||||
to: '/docs/api/llama-stack-specification',
|
to: '/docs/api-overview',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
@ -170,7 +228,7 @@ const config: Config = {
|
||||||
id: "openapi",
|
id: "openapi",
|
||||||
docsPluginId: "classic",
|
docsPluginId: "classic",
|
||||||
config: {
|
config: {
|
||||||
llamastack: {
|
stable: {
|
||||||
specPath: "static/llama-stack-spec.yaml",
|
specPath: "static/llama-stack-spec.yaml",
|
||||||
outputDir: "docs/api",
|
outputDir: "docs/api",
|
||||||
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/llama-stack-spec.yaml",
|
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/llama-stack-spec.yaml",
|
||||||
|
|
@ -179,6 +237,24 @@ const config: Config = {
|
||||||
categoryLinkSource: "tag",
|
categoryLinkSource: "tag",
|
||||||
},
|
},
|
||||||
} satisfies OpenApiPlugin.Options,
|
} satisfies OpenApiPlugin.Options,
|
||||||
|
experimental: {
|
||||||
|
specPath: "static/experimental-llama-stack-spec.yaml",
|
||||||
|
outputDir: "docs/api-experimental",
|
||||||
|
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/experimental-llama-stack-spec.yaml",
|
||||||
|
sidebarOptions: {
|
||||||
|
groupPathsBy: "tag",
|
||||||
|
categoryLinkSource: "tag",
|
||||||
|
},
|
||||||
|
} satisfies OpenApiPlugin.Options,
|
||||||
|
deprecated: {
|
||||||
|
specPath: "static/deprecated-llama-stack-spec.yaml",
|
||||||
|
outputDir: "docs/api-deprecated",
|
||||||
|
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/deprecated-llama-stack-spec.yaml",
|
||||||
|
sidebarOptions: {
|
||||||
|
groupPathsBy: "tag",
|
||||||
|
categoryLinkSource: "tag",
|
||||||
|
},
|
||||||
|
} satisfies OpenApiPlugin.Options,
|
||||||
} satisfies Plugin.PluginOptions,
|
} satisfies Plugin.PluginOptions,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -34,40 +34,59 @@ def str_presenter(dumper, data):
|
||||||
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
||||||
|
|
||||||
|
|
||||||
def main(output_dir: str):
|
def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: bool = False, combined_spec: bool = False):
|
||||||
output_dir = Path(output_dir)
|
"""Generate OpenAPI spec with optional stability filtering."""
|
||||||
if not output_dir.exists():
|
|
||||||
raise ValueError(f"Directory {output_dir} does not exist")
|
|
||||||
|
|
||||||
# Validate API protocols before generating spec
|
if combined_spec:
|
||||||
return_type_errors = validate_api()
|
# Special case for combined stable + experimental APIs
|
||||||
if return_type_errors:
|
title_suffix = " - Stable & Experimental APIs"
|
||||||
print("\nAPI Method Return Type Validation Errors:\n")
|
filename_prefix = "stainless-"
|
||||||
for error in return_type_errors:
|
description_suffix = "\n\n**🔗 COMBINED**: This specification includes both stable production-ready APIs and experimental pre-release APIs. Use stable APIs for production deployments and experimental APIs for testing new features."
|
||||||
print(error, file=sys.stderr)
|
# Use the special "stainless" filter to include stable + experimental APIs
|
||||||
sys.exit(1)
|
stability_filter = "stainless"
|
||||||
now = str(datetime.now())
|
elif stability_filter:
|
||||||
print(
|
title_suffix = {
|
||||||
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
"stable": " - Stable APIs" if not main_spec else "",
|
||||||
)
|
"experimental": " - Experimental APIs",
|
||||||
print("")
|
"deprecated": " - Deprecated APIs"
|
||||||
|
}.get(stability_filter, f" - {stability_filter.title()} APIs")
|
||||||
|
|
||||||
|
# Use main spec filename for stable when main_spec=True
|
||||||
|
if main_spec and stability_filter == "stable":
|
||||||
|
filename_prefix = ""
|
||||||
|
else:
|
||||||
|
filename_prefix = f"{stability_filter}-"
|
||||||
|
|
||||||
|
description_suffix = {
|
||||||
|
"stable": "\n\n**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.",
|
||||||
|
"experimental": "\n\n**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before becoming stable.",
|
||||||
|
"deprecated": "\n\n**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for migration reference only."
|
||||||
|
}.get(stability_filter, "")
|
||||||
|
else:
|
||||||
|
title_suffix = ""
|
||||||
|
filename_prefix = ""
|
||||||
|
description_suffix = ""
|
||||||
|
|
||||||
spec = Specification(
|
spec = Specification(
|
||||||
LlamaStack,
|
LlamaStack,
|
||||||
Options(
|
Options(
|
||||||
server=Server(url="http://any-hosted-llama-stack.com"),
|
server=Server(url="http://any-hosted-llama-stack.com"),
|
||||||
info=Info(
|
info=Info(
|
||||||
title="Llama Stack Specification",
|
title=f"Llama Stack Specification{title_suffix}",
|
||||||
version=LLAMA_STACK_API_V1,
|
version=LLAMA_STACK_API_V1,
|
||||||
description="""This is the specification of the Llama Stack that provides
|
description=f"""This is the specification of the Llama Stack that provides
|
||||||
a set of endpoints and their corresponding interfaces that are tailored to
|
a set of endpoints and their corresponding interfaces that are tailored to
|
||||||
best leverage Llama Models.""",
|
best leverage Llama Models.{description_suffix}""",
|
||||||
),
|
),
|
||||||
include_standard_error_responses=True,
|
include_standard_error_responses=True,
|
||||||
|
stability_filter=stability_filter, # Pass the filter to the generator
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
|
yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
|
||||||
|
html_filename = f"{filename_prefix}llama-stack-spec.html"
|
||||||
|
|
||||||
|
with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
|
||||||
y = yaml.YAML()
|
y = yaml.YAML()
|
||||||
y.default_flow_style = False
|
y.default_flow_style = False
|
||||||
y.block_seq_indent = 2
|
y.block_seq_indent = 2
|
||||||
|
|
@ -83,9 +102,39 @@ def main(output_dir: str):
|
||||||
fp,
|
fp,
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(output_dir / "llama-stack-spec.html", "w") as fp:
|
with open(output_dir / html_filename, "w") as fp:
|
||||||
spec.write_html(fp, pretty_print=True)
|
spec.write_html(fp, pretty_print=True)
|
||||||
|
|
||||||
|
print(f"Generated {yaml_filename} and {html_filename}")
|
||||||
|
|
||||||
|
def main(output_dir: str):
|
||||||
|
output_dir = Path(output_dir)
|
||||||
|
if not output_dir.exists():
|
||||||
|
raise ValueError(f"Directory {output_dir} does not exist")
|
||||||
|
|
||||||
|
# Validate API protocols before generating spec
|
||||||
|
return_type_errors = validate_api()
|
||||||
|
if return_type_errors:
|
||||||
|
print("\nAPI Method Return Type Validation Errors:\n")
|
||||||
|
for error in return_type_errors:
|
||||||
|
print(error, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
now = str(datetime.now())
|
||||||
|
print(f"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at {now}")
|
||||||
|
print("")
|
||||||
|
|
||||||
|
# Generate main spec as stable APIs (llama-stack-spec.yaml)
|
||||||
|
print("Generating main specification (stable APIs)...")
|
||||||
|
generate_spec(output_dir, "stable", main_spec=True)
|
||||||
|
|
||||||
|
print("Generating other stability-filtered specifications...")
|
||||||
|
generate_spec(output_dir, "experimental")
|
||||||
|
generate_spec(output_dir, "deprecated")
|
||||||
|
|
||||||
|
print("Generating combined stable + experimental specification...")
|
||||||
|
generate_spec(output_dir, combined_spec=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
fire.Fire(main)
|
fire.Fire(main)
|
||||||
|
|
|
||||||
|
|
@ -7,13 +7,14 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import inspect
|
import inspect
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
import os
|
||||||
import types
|
import types
|
||||||
import typing
|
import typing
|
||||||
from dataclasses import make_dataclass
|
from dataclasses import make_dataclass
|
||||||
|
from pathlib import Path
|
||||||
from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
|
from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
|
||||||
|
|
||||||
from fastapi import UploadFile
|
from fastapi import UploadFile
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from llama_stack.apis.datatypes import Error
|
from llama_stack.apis.datatypes import Error
|
||||||
from llama_stack.strong_typing.core import JsonType
|
from llama_stack.strong_typing.core import JsonType
|
||||||
|
|
@ -35,6 +36,7 @@ from llama_stack.strong_typing.schema import (
|
||||||
SchemaOptions,
|
SchemaOptions,
|
||||||
)
|
)
|
||||||
from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
|
from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .operations import (
|
from .operations import (
|
||||||
EndpointOperation,
|
EndpointOperation,
|
||||||
|
|
@ -48,6 +50,7 @@ from .specification import (
|
||||||
Document,
|
Document,
|
||||||
Example,
|
Example,
|
||||||
ExampleRef,
|
ExampleRef,
|
||||||
|
ExtraBodyParameter,
|
||||||
MediaType,
|
MediaType,
|
||||||
Operation,
|
Operation,
|
||||||
Parameter,
|
Parameter,
|
||||||
|
|
@ -546,6 +549,84 @@ class Generator:
|
||||||
|
|
||||||
return extra_tags
|
return extra_tags
|
||||||
|
|
||||||
|
def _get_api_group_for_operation(self, op) -> str | None:
|
||||||
|
"""
|
||||||
|
Determine the API group for an operation based on its route path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
op: The endpoint operation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The API group name derived from the route, or None if unable to determine
|
||||||
|
"""
|
||||||
|
if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
|
||||||
|
return None
|
||||||
|
|
||||||
|
route = op.webmethod.route
|
||||||
|
if not route or not route.startswith('/'):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Extract API group from route path
|
||||||
|
# Examples: /v1/agents/list -> agents-api
|
||||||
|
# /v1/responses -> responses-api
|
||||||
|
# /v1/models -> models-api
|
||||||
|
path_parts = route.strip('/').split('/')
|
||||||
|
|
||||||
|
if len(path_parts) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Skip version prefix (v1, v1alpha, v1beta, etc.)
|
||||||
|
if path_parts[0].startswith('v1'):
|
||||||
|
if len(path_parts) < 2:
|
||||||
|
return None
|
||||||
|
api_segment = path_parts[1]
|
||||||
|
else:
|
||||||
|
api_segment = path_parts[0]
|
||||||
|
|
||||||
|
# Convert to supplementary file naming convention
|
||||||
|
# agents -> agents-api, responses -> responses-api, etc.
|
||||||
|
return f"{api_segment}-api"
|
||||||
|
|
||||||
|
def _load_supplemental_content(self, api_group: str | None) -> str:
|
||||||
|
"""
|
||||||
|
Load supplemental content for an API group based on stability level.
|
||||||
|
|
||||||
|
Follows this resolution order:
|
||||||
|
1. docs/supplementary/{stability}/{api_group}.md
|
||||||
|
2. docs/supplementary/shared/{api_group}.md (fallback)
|
||||||
|
3. Empty string if no files found
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The supplemental content as markdown string, or empty string if not found
|
||||||
|
"""
|
||||||
|
if not api_group:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
base_path = Path(__file__).parent.parent.parent / "supplementary"
|
||||||
|
|
||||||
|
# Try stability-specific content first if stability filter is set
|
||||||
|
if self.options.stability_filter:
|
||||||
|
stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
|
||||||
|
if stability_path.exists():
|
||||||
|
try:
|
||||||
|
return stability_path.read_text(encoding="utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
|
||||||
|
|
||||||
|
# Fall back to shared content
|
||||||
|
shared_path = base_path / "shared" / f"{api_group}.md"
|
||||||
|
if shared_path.exists():
|
||||||
|
try:
|
||||||
|
return shared_path.read_text(encoding="utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
|
||||||
|
|
||||||
|
# No supplemental content found
|
||||||
|
return ""
|
||||||
|
|
||||||
def _build_operation(self, op: EndpointOperation) -> Operation:
|
def _build_operation(self, op: EndpointOperation) -> Operation:
|
||||||
if op.defining_class.__name__ in [
|
if op.defining_class.__name__ in [
|
||||||
"SyntheticDataGeneration",
|
"SyntheticDataGeneration",
|
||||||
|
|
@ -597,6 +678,27 @@ class Generator:
|
||||||
# parameters passed anywhere
|
# parameters passed anywhere
|
||||||
parameters = path_parameters + query_parameters
|
parameters = path_parameters + query_parameters
|
||||||
|
|
||||||
|
# Build extra body parameters documentation
|
||||||
|
extra_body_parameters = []
|
||||||
|
for param_name, param_type, description in op.extra_body_params:
|
||||||
|
if is_type_optional(param_type):
|
||||||
|
inner_type: type = unwrap_optional_type(param_type)
|
||||||
|
required = False
|
||||||
|
else:
|
||||||
|
inner_type = param_type
|
||||||
|
required = True
|
||||||
|
|
||||||
|
# Use description from ExtraBodyField if available, otherwise from docstring
|
||||||
|
param_description = description or doc_params.get(param_name)
|
||||||
|
|
||||||
|
extra_body_param = ExtraBodyParameter(
|
||||||
|
name=param_name,
|
||||||
|
schema=self.schema_builder.classdef_to_ref(inner_type),
|
||||||
|
description=param_description,
|
||||||
|
required=required,
|
||||||
|
)
|
||||||
|
extra_body_parameters.append(extra_body_param)
|
||||||
|
|
||||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
||||||
raw_bytes_request_body = False
|
raw_bytes_request_body = False
|
||||||
if webmethod:
|
if webmethod:
|
||||||
|
|
@ -797,10 +899,14 @@ class Generator:
|
||||||
else:
|
else:
|
||||||
callbacks = None
|
callbacks = None
|
||||||
|
|
||||||
description = "\n".join(
|
# Build base description from docstring
|
||||||
|
base_description = "\n".join(
|
||||||
filter(None, [doc_string.short_description, doc_string.long_description])
|
filter(None, [doc_string.short_description, doc_string.long_description])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Individual endpoints get clean descriptions only
|
||||||
|
description = base_description
|
||||||
|
|
||||||
return Operation(
|
return Operation(
|
||||||
tags=[
|
tags=[
|
||||||
getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
|
getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
|
||||||
|
|
@ -811,16 +917,126 @@ class Generator:
|
||||||
requestBody=requestBody,
|
requestBody=requestBody,
|
||||||
responses=responses,
|
responses=responses,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
deprecated=True if "DEPRECATED" in op.func_name else None,
|
deprecated=getattr(op.webmethod, "deprecated", False)
|
||||||
|
or "DEPRECATED" in op.func_name,
|
||||||
security=[] if op.public else None,
|
security=[] if op.public else None,
|
||||||
|
extraBodyParameters=extra_body_parameters if extra_body_parameters else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _get_api_stability_priority(self, api_level: str) -> int:
|
||||||
|
"""
|
||||||
|
Return sorting priority for API stability levels.
|
||||||
|
Lower numbers = higher priority (appear first)
|
||||||
|
|
||||||
|
:param api_level: The API level (e.g., "v1", "v1beta", "v1alpha")
|
||||||
|
:return: Priority number for sorting
|
||||||
|
"""
|
||||||
|
stability_order = {
|
||||||
|
"v1": 0, # Stable - highest priority
|
||||||
|
"v1beta": 1, # Beta - medium priority
|
||||||
|
"v1alpha": 2, # Alpha - lowest priority
|
||||||
|
}
|
||||||
|
return stability_order.get(api_level, 999) # Unknown levels go last
|
||||||
|
|
||||||
def generate(self) -> Document:
|
def generate(self) -> Document:
|
||||||
paths: Dict[str, PathItem] = {}
|
paths: Dict[str, PathItem] = {}
|
||||||
endpoint_classes: Set[type] = set()
|
endpoint_classes: Set[type] = set()
|
||||||
for op in get_endpoint_operations(
|
|
||||||
|
# Collect all operations and filter by stability if specified
|
||||||
|
operations = list(
|
||||||
|
get_endpoint_operations(
|
||||||
self.endpoint, use_examples=self.options.use_examples
|
self.endpoint, use_examples=self.options.use_examples
|
||||||
):
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter operations by stability level if requested
|
||||||
|
if self.options.stability_filter:
|
||||||
|
filtered_operations = []
|
||||||
|
for op in operations:
|
||||||
|
deprecated = (
|
||||||
|
getattr(op.webmethod, "deprecated", False)
|
||||||
|
or "DEPRECATED" in op.func_name
|
||||||
|
)
|
||||||
|
stability_level = op.webmethod.level
|
||||||
|
|
||||||
|
if self.options.stability_filter == "stable":
|
||||||
|
# Include v1 non-deprecated endpoints
|
||||||
|
if stability_level == "v1" and not deprecated:
|
||||||
|
filtered_operations.append(op)
|
||||||
|
elif self.options.stability_filter == "experimental":
|
||||||
|
# Include v1alpha and v1beta endpoints (deprecated or not)
|
||||||
|
if stability_level in ["v1alpha", "v1beta"]:
|
||||||
|
filtered_operations.append(op)
|
||||||
|
elif self.options.stability_filter == "deprecated":
|
||||||
|
# Include only deprecated endpoints
|
||||||
|
if deprecated:
|
||||||
|
filtered_operations.append(op)
|
||||||
|
elif self.options.stability_filter == "stainless":
|
||||||
|
# Include both stable (v1 non-deprecated) and experimental (v1alpha, v1beta) endpoints
|
||||||
|
if (stability_level == "v1" and not deprecated) or stability_level in ["v1alpha", "v1beta"]:
|
||||||
|
filtered_operations.append(op)
|
||||||
|
|
||||||
|
operations = filtered_operations
|
||||||
|
print(
|
||||||
|
f"Filtered to {len(operations)} operations for stability level: {self.options.stability_filter}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort operations by multiple criteria for consistent ordering:
|
||||||
|
# 1. Stability level with deprecation handling (global priority):
|
||||||
|
# - Active stable (v1) comes first
|
||||||
|
# - Beta (v1beta) comes next
|
||||||
|
# - Alpha (v1alpha) comes next
|
||||||
|
# - Deprecated stable (v1 deprecated) comes last
|
||||||
|
# 2. Route path (group related endpoints within same stability level)
|
||||||
|
# 3. HTTP method (GET, POST, PUT, DELETE, PATCH)
|
||||||
|
# 4. Operation name (alphabetical)
|
||||||
|
def sort_key(op):
|
||||||
|
http_method_order = {
|
||||||
|
HTTPMethod.GET: 0,
|
||||||
|
HTTPMethod.POST: 1,
|
||||||
|
HTTPMethod.PUT: 2,
|
||||||
|
HTTPMethod.DELETE: 3,
|
||||||
|
HTTPMethod.PATCH: 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enhanced stability priority for migration pattern support
|
||||||
|
deprecated = getattr(op.webmethod, "deprecated", False)
|
||||||
|
stability_priority = self._get_api_stability_priority(op.webmethod.level)
|
||||||
|
|
||||||
|
# Deprecated versions should appear after everything else
|
||||||
|
# This ensures deprecated stable endpoints come last globally
|
||||||
|
if deprecated:
|
||||||
|
stability_priority += 10 # Push deprecated endpoints to the end
|
||||||
|
|
||||||
|
return (
|
||||||
|
stability_priority, # Global stability handling comes first
|
||||||
|
op.get_route(
|
||||||
|
op.webmethod
|
||||||
|
), # Group by route path within stability level
|
||||||
|
http_method_order.get(op.http_method, 999),
|
||||||
|
op.func_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
operations.sort(key=sort_key)
|
||||||
|
|
||||||
|
# Debug output for migration pattern tracking
|
||||||
|
migration_routes = {}
|
||||||
|
for op in operations:
|
||||||
|
route_key = (op.get_route(op.webmethod), op.http_method)
|
||||||
|
if route_key not in migration_routes:
|
||||||
|
migration_routes[route_key] = []
|
||||||
|
migration_routes[route_key].append(
|
||||||
|
(op.webmethod.level, getattr(op.webmethod, "deprecated", False))
|
||||||
|
)
|
||||||
|
|
||||||
|
for route_key, versions in migration_routes.items():
|
||||||
|
if len(versions) > 1:
|
||||||
|
print(f"Migration pattern detected for {route_key[1]} {route_key[0]}:")
|
||||||
|
for level, deprecated in versions:
|
||||||
|
status = "DEPRECATED" if deprecated else "ACTIVE"
|
||||||
|
print(f" - {level} ({status})")
|
||||||
|
|
||||||
|
for op in operations:
|
||||||
endpoint_classes.add(op.defining_class)
|
endpoint_classes.add(op.defining_class)
|
||||||
|
|
||||||
operation = self._build_operation(op)
|
operation = self._build_operation(op)
|
||||||
|
|
@ -851,10 +1067,22 @@ class Generator:
|
||||||
doc_string = parse_type(cls)
|
doc_string = parse_type(cls)
|
||||||
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Add supplemental content to tag pages
|
||||||
|
api_group = f"{cls.__name__.lower()}-api"
|
||||||
|
supplemental_content = self._load_supplemental_content(api_group)
|
||||||
|
|
||||||
|
tag_description = doc_string.long_description or ""
|
||||||
|
if supplemental_content:
|
||||||
|
if tag_description:
|
||||||
|
tag_description = f"{tag_description}\n\n{supplemental_content}"
|
||||||
|
else:
|
||||||
|
tag_description = supplemental_content
|
||||||
|
|
||||||
operation_tags.append(
|
operation_tags.append(
|
||||||
Tag(
|
Tag(
|
||||||
name=cls.__name__,
|
name=cls.__name__,
|
||||||
description=doc_string.long_description,
|
description=tag_description,
|
||||||
displayName=doc_string.short_description,
|
displayName=doc_string.short_description,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@ from fastapi import UploadFile
|
||||||
from fastapi.params import File, Form
|
from fastapi.params import File, Form
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import ExtraBodyField
|
||||||
|
|
||||||
|
|
||||||
def split_prefix(
|
def split_prefix(
|
||||||
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
||||||
|
|
@ -89,6 +91,7 @@ class EndpointOperation:
|
||||||
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
||||||
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
||||||
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
||||||
|
:param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK.
|
||||||
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
||||||
:param response_type: The Python type of the data that is transmitted in the response body.
|
:param response_type: The Python type of the data that is transmitted in the response body.
|
||||||
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
||||||
|
|
@ -106,6 +109,7 @@ class EndpointOperation:
|
||||||
query_params: List[OperationParameter]
|
query_params: List[OperationParameter]
|
||||||
request_params: Optional[OperationParameter]
|
request_params: Optional[OperationParameter]
|
||||||
multipart_params: List[OperationParameter]
|
multipart_params: List[OperationParameter]
|
||||||
|
extra_body_params: List[tuple[str, type, str | None]]
|
||||||
event_type: Optional[type]
|
event_type: Optional[type]
|
||||||
response_type: type
|
response_type: type
|
||||||
http_method: HTTPMethod
|
http_method: HTTPMethod
|
||||||
|
|
@ -265,6 +269,7 @@ def get_endpoint_operations(
|
||||||
query_params = []
|
query_params = []
|
||||||
request_params = []
|
request_params = []
|
||||||
multipart_params = []
|
multipart_params = []
|
||||||
|
extra_body_params = []
|
||||||
|
|
||||||
for param_name, parameter in signature.parameters.items():
|
for param_name, parameter in signature.parameters.items():
|
||||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||||
|
|
@ -279,6 +284,13 @@ def get_endpoint_operations(
|
||||||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check if this is an extra_body parameter
|
||||||
|
is_extra_body, extra_body_desc = _is_extra_body_param(param_type)
|
||||||
|
if is_extra_body:
|
||||||
|
# Store in a separate list for documentation
|
||||||
|
extra_body_params.append((param_name, param_type, extra_body_desc))
|
||||||
|
continue # Skip adding to request_params
|
||||||
|
|
||||||
is_multipart = _is_multipart_param(param_type)
|
is_multipart = _is_multipart_param(param_type)
|
||||||
|
|
||||||
if prefix in ["get", "delete"]:
|
if prefix in ["get", "delete"]:
|
||||||
|
|
@ -351,6 +363,7 @@ def get_endpoint_operations(
|
||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_params=request_params,
|
request_params=request_params,
|
||||||
multipart_params=multipart_params,
|
multipart_params=multipart_params,
|
||||||
|
extra_body_params=extra_body_params,
|
||||||
event_type=event_type,
|
event_type=event_type,
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
http_method=http_method,
|
http_method=http_method,
|
||||||
|
|
@ -429,3 +442,22 @@ def _is_multipart_param(param_type: type) -> bool:
|
||||||
if isinstance(annotation, (File, Form)):
|
if isinstance(annotation, (File, Form)):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]:
|
||||||
|
"""
|
||||||
|
Check if parameter is marked as coming from extra_body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_extra_body, description): Tuple of boolean and optional description
|
||||||
|
"""
|
||||||
|
origin = get_origin(param_type)
|
||||||
|
if origin is Annotated:
|
||||||
|
args = get_args(param_type)
|
||||||
|
for annotation in args[1:]:
|
||||||
|
if isinstance(annotation, ExtraBodyField):
|
||||||
|
return True, annotation.description
|
||||||
|
# Also check by type name for cases where import matters
|
||||||
|
if type(annotation).__name__ == 'ExtraBodyField':
|
||||||
|
return True, getattr(annotation, 'description', None)
|
||||||
|
return False, None
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ class Options:
|
||||||
property_description_fun: Optional[Callable[[type, str, str], str]] = None
|
property_description_fun: Optional[Callable[[type, str, str], str]] = None
|
||||||
captions: Optional[Dict[str, str]] = None
|
captions: Optional[Dict[str, str]] = None
|
||||||
include_standard_error_responses: bool = True
|
include_standard_error_responses: bool = True
|
||||||
|
stability_filter: Optional[str] = None
|
||||||
|
|
||||||
default_captions: ClassVar[Dict[str, str]] = {
|
default_captions: ClassVar[Dict[str, str]] = {
|
||||||
"Operations": "Operations",
|
"Operations": "Operations",
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,15 @@ class Parameter:
|
||||||
example: Optional[Any] = None
|
example: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtraBodyParameter:
|
||||||
|
"""Represents a parameter that arrives via extra_body in the request."""
|
||||||
|
name: str
|
||||||
|
schema: SchemaOrRef
|
||||||
|
description: Optional[str] = None
|
||||||
|
required: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Operation:
|
class Operation:
|
||||||
responses: Dict[str, Union[Response, ResponseRef]]
|
responses: Dict[str, Union[Response, ResponseRef]]
|
||||||
|
|
@ -118,6 +127,7 @@ class Operation:
|
||||||
callbacks: Optional[Dict[str, "Callback"]] = None
|
callbacks: Optional[Dict[str, "Callback"]] = None
|
||||||
security: Optional[List["SecurityRequirement"]] = None
|
security: Optional[List["SecurityRequirement"]] = None
|
||||||
deprecated: Optional[bool] = None
|
deprecated: Optional[bool] = None
|
||||||
|
extraBodyParameters: Optional[List[ExtraBodyParameter]] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,17 @@ class Specification:
|
||||||
if display_name:
|
if display_name:
|
||||||
tag["x-displayName"] = display_name
|
tag["x-displayName"] = display_name
|
||||||
|
|
||||||
|
# Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params
|
||||||
|
paths = json_doc.get("paths", {})
|
||||||
|
for path_item in paths.values():
|
||||||
|
if isinstance(path_item, dict):
|
||||||
|
for method in ["get", "post", "put", "delete", "patch"]:
|
||||||
|
operation = path_item.get(method)
|
||||||
|
if operation and isinstance(operation, dict):
|
||||||
|
extra_body_params = operation.pop("extraBodyParameters", None)
|
||||||
|
if extra_body_params:
|
||||||
|
operation["x-llama-stack-extra-body-params"] = extra_body_params
|
||||||
|
|
||||||
return json_doc
|
return json_doc
|
||||||
|
|
||||||
def get_json_string(self, pretty_print: bool = False) -> str:
|
def get_json_string(self, pretty_print: bool = False) -> str:
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Getting Started',
|
label: 'Getting Started',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'getting_started/quickstart',
|
'getting_started/quickstart',
|
||||||
'getting_started/detailed_tutorial',
|
'getting_started/detailed_tutorial',
|
||||||
|
|
@ -26,7 +26,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Concepts',
|
label: 'Concepts',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'concepts/index',
|
'concepts/index',
|
||||||
'concepts/architecture',
|
'concepts/architecture',
|
||||||
|
|
@ -48,7 +48,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Distributions',
|
label: 'Distributions',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'distributions/index',
|
'distributions/index',
|
||||||
'distributions/list_of_distributions',
|
'distributions/list_of_distributions',
|
||||||
|
|
@ -93,7 +93,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Providers',
|
label: 'Providers',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'providers/index',
|
'providers/index',
|
||||||
{
|
{
|
||||||
|
|
@ -276,7 +276,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Building Applications',
|
label: 'Building Applications',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'building_applications/index',
|
'building_applications/index',
|
||||||
'building_applications/rag',
|
'building_applications/rag',
|
||||||
|
|
@ -293,7 +293,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Advanced APIs',
|
label: 'Advanced APIs',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'advanced_apis/post_training',
|
'advanced_apis/post_training',
|
||||||
'advanced_apis/evaluation',
|
'advanced_apis/evaluation',
|
||||||
|
|
@ -303,7 +303,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Deploying',
|
label: 'Deploying',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'deploying/index',
|
'deploying/index',
|
||||||
'deploying/kubernetes_deployment',
|
'deploying/kubernetes_deployment',
|
||||||
|
|
@ -313,7 +313,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Contributing',
|
label: 'Contributing',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'contributing/index',
|
'contributing/index',
|
||||||
'contributing/new_api_provider',
|
'contributing/new_api_provider',
|
||||||
|
|
@ -324,7 +324,7 @@ const sidebars: SidebarsConfig = {
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'References',
|
label: 'References',
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
'references/index',
|
'references/index',
|
||||||
'references/llama_cli_reference/index',
|
'references/llama_cli_reference/index',
|
||||||
|
|
@ -335,8 +335,10 @@ const sidebars: SidebarsConfig = {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
||||||
// API Reference sidebar - use plugin-generated sidebar
|
// API Reference sidebars - use plugin-generated sidebars
|
||||||
apiSidebar: require('./docs/api/sidebar.ts').default,
|
stableApiSidebar: require('./docs/api/sidebar.ts').default,
|
||||||
|
experimentalApiSidebar: require('./docs/api-experimental/sidebar.ts').default,
|
||||||
|
deprecatedApiSidebar: require('./docs/api-deprecated/sidebar.ts').default,
|
||||||
};
|
};
|
||||||
|
|
||||||
export default sidebars;
|
export default sidebars;
|
||||||
|
|
|
||||||
|
|
@ -189,3 +189,29 @@ button[class*="button"]:hover,
|
||||||
.pagination-nav__link--prev:hover {
|
.pagination-nav__link--prev:hover {
|
||||||
background-color: #f3f4f6 !important;
|
background-color: #f3f4f6 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Deprecated endpoint styling */
|
||||||
|
.menu__list-item--deprecated .menu__link {
|
||||||
|
text-decoration: line-through !important;
|
||||||
|
opacity: 0.7;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.menu__list-item--deprecated .menu__link:hover {
|
||||||
|
opacity: 0.9;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deprecated endpoint badges - slightly muted */
|
||||||
|
.menu__list-item--deprecated.api-method > .menu__link::before {
|
||||||
|
opacity: 0.7;
|
||||||
|
border-style: dashed !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dark theme adjustments for deprecated endpoints */
|
||||||
|
[data-theme='dark'] .menu__list-item--deprecated .menu__link {
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme='dark'] .menu__list-item--deprecated .menu__link:hover {
|
||||||
|
opacity: 0.8;
|
||||||
|
}
|
||||||
|
|
|
||||||
13427
docs/static/deprecated-llama-stack-spec.html
vendored
Normal file
10051
docs/static/deprecated-llama-stack-spec.yaml
vendored
Normal file
6450
docs/static/experimental-llama-stack-spec.html
vendored
Normal file
4798
docs/static/experimental-llama-stack-spec.yaml
vendored
Normal file
BIN
docs/static/img/favicon-16x16.png
vendored
Normal file
|
After Width: | Height: | Size: 657 B |
BIN
docs/static/img/favicon-32x32.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 KiB |
BIN
docs/static/img/favicon-48x48.png
vendored
Normal file
|
After Width: | Height: | Size: 3.3 KiB |
BIN
docs/static/img/favicon-64x64.png
vendored
Normal file
|
After Width: | Height: | Size: 4.9 KiB |
BIN
docs/static/img/favicon.ico
vendored
Normal file
|
After Width: | Height: | Size: 679 B |
BIN
docs/static/img/favicon.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 KiB |
BIN
docs/static/img/llama-stack.png
vendored
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 604 KiB |
22496
docs/static/llama-stack-spec.html
vendored
16963
docs/static/llama-stack-spec.yaml
vendored
BIN
docs/static/llama-stack.png
vendored
|
Before Width: | Height: | Size: 196 KiB |
36
docs/static/site.webmanifest
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
{
|
||||||
|
"name": "Llama Stack",
|
||||||
|
"short_name": "Llama Stack",
|
||||||
|
"description": "The open-source framework for building generative AI applications",
|
||||||
|
"start_url": "/",
|
||||||
|
"display": "standalone",
|
||||||
|
"theme_color": "#7C3AED",
|
||||||
|
"background_color": "#ffffff",
|
||||||
|
"icons": [
|
||||||
|
{
|
||||||
|
"src": "/img/favicon-16x16.png",
|
||||||
|
"sizes": "16x16",
|
||||||
|
"type": "image/png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src": "/img/favicon-32x32.png",
|
||||||
|
"sizes": "32x32",
|
||||||
|
"type": "image/png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src": "/img/favicon-48x48.png",
|
||||||
|
"sizes": "48x48",
|
||||||
|
"type": "image/png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src": "/img/favicon-64x64.png",
|
||||||
|
"sizes": "64x64",
|
||||||
|
"type": "image/png"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src": "/img/llama-stack-logo.png",
|
||||||
|
"sizes": "200x200",
|
||||||
|
"type": "image/png"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
18601
docs/static/stainless-llama-stack-spec.html
vendored
Normal file
13870
docs/static/stainless-llama-stack-spec.yaml
vendored
Normal file
9
docs/supplementary/deprecated/agents-api.md
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
## Deprecated APIs
|
||||||
|
|
||||||
|
> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.
|
||||||
|
|
||||||
|
### Migration Guidance
|
||||||
|
|
||||||
|
If you are using deprecated versions of the Agents or Responses APIs, please migrate to:
|
||||||
|
|
||||||
|
- **Responses API**: Use the stable v1 Responses API endpoints
|
||||||
21
docs/supplementary/experimental/agents-api.md
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
## Agents API (Experimental)
|
||||||
|
|
||||||
|
> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.
|
||||||
|
|
||||||
|
Main functionalities provided by this API:
|
||||||
|
|
||||||
|
- Create agents with specific instructions and ability to use tools.
|
||||||
|
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||||
|
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||||
|
- Agents can be provided with various shields (see the Safety API for more details).
|
||||||
|
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||||
|
|
||||||
|
### 🧪 Feedback Welcome
|
||||||
|
|
||||||
|
This API is actively being developed. We welcome feedback on:
|
||||||
|
- API design and usability
|
||||||
|
- Performance characteristics
|
||||||
|
- Missing features or capabilities
|
||||||
|
- Integration patterns
|
||||||
|
|
||||||
|
**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
|
||||||
40
docs/supplementary/stable/agents-api.md
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
## Responses API
|
||||||
|
|
||||||
|
The Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.
|
||||||
|
|
||||||
|
> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.
|
||||||
|
|
||||||
|
### ✅ Supported Tools
|
||||||
|
|
||||||
|
The Responses API supports the following tool types:
|
||||||
|
|
||||||
|
- **`web_search`**: Search the web for current information and real-time data
|
||||||
|
- **`file_search`**: Search through uploaded files and vector stores
|
||||||
|
- Supports dynamic `vector_store_ids` per call
|
||||||
|
- Compatible with OpenAI file search patterns
|
||||||
|
- **`function`**: Call custom functions with JSON schema validation
|
||||||
|
- **`mcp_tool`**: Model Context Protocol integration
|
||||||
|
|
||||||
|
### ✅ Supported Fields & Features
|
||||||
|
|
||||||
|
**Core Capabilities:**
|
||||||
|
- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration
|
||||||
|
- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths
|
||||||
|
- **Rich Annotations**: Automatic file citations, URL citations, and container file citations
|
||||||
|
- **Status Tracking**: Monitor tool call execution status and handle failures gracefully
|
||||||
|
|
||||||
|
### 🚧 Work in Progress
|
||||||
|
|
||||||
|
- Full real-time response streaming support
|
||||||
|
- `tool_choice` parameter
|
||||||
|
- `max_tool_calls` parameter
|
||||||
|
- Built-in tools (code interpreter, containers API)
|
||||||
|
- Safety & guardrails
|
||||||
|
- `reasoning` capabilities
|
||||||
|
- `service_tier`
|
||||||
|
- `logprobs`
|
||||||
|
- `max_output_tokens`
|
||||||
|
- `metadata` handling
|
||||||
|
- `instructions`
|
||||||
|
- `incomplete_details`
|
||||||
|
- `background`
|
||||||
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
from llama_stack.apis.safety import SafetyViolation
|
from llama_stack.apis.safety import SafetyViolation
|
||||||
from llama_stack.apis.tools import ToolDef
|
from llama_stack.apis.tools import ToolDef
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
from .openai_responses import (
|
from .openai_responses import (
|
||||||
ListOpenAIResponseInputItem,
|
ListOpenAIResponseInputItem,
|
||||||
|
|
@ -42,6 +42,20 @@ from .openai_responses import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ResponseShieldSpec(BaseModel):
|
||||||
|
"""Specification for a shield to apply during response generation.
|
||||||
|
|
||||||
|
:param type: The type/identifier of the shield.
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: str
|
||||||
|
# TODO: more fields to be added for shield configuration
|
||||||
|
|
||||||
|
|
||||||
|
ResponseShield = str | ResponseShieldSpec
|
||||||
|
|
||||||
|
|
||||||
class Attachment(BaseModel):
|
class Attachment(BaseModel):
|
||||||
"""An attachment to an agent turn.
|
"""An attachment to an agent turn.
|
||||||
|
|
||||||
|
|
@ -472,20 +486,23 @@ class AgentStepResponse(BaseModel):
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class Agents(Protocol):
|
class Agents(Protocol):
|
||||||
"""Agents API for creating and interacting with agentic systems.
|
"""Agents
|
||||||
|
|
||||||
Main functionalities provided by this API:
|
APIs for creating and interacting with agentic systems."""
|
||||||
- Create agents with specific instructions and ability to use tools.
|
|
||||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
|
||||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
|
||||||
- Agents can be provided with various shields (see the Safety API for more details).
|
|
||||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/agents", method="POST", descriptive_name="create_agent", deprecated=True, level=LLAMA_STACK_API_V1
|
route="/agents",
|
||||||
|
method="POST",
|
||||||
|
descriptive_name="create_agent",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/agents",
|
||||||
|
method="POST",
|
||||||
|
descriptive_name="create_agent",
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
)
|
)
|
||||||
@webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1ALPHA)
|
|
||||||
async def create_agent(
|
async def create_agent(
|
||||||
self,
|
self,
|
||||||
agent_config: AgentConfig,
|
agent_config: AgentConfig,
|
||||||
|
|
@ -648,8 +665,17 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
route="/agents/{agent_id}/session/{session_id}",
|
||||||
|
method="GET",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/agents/{agent_id}/session/{session_id}",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
|
)
|
||||||
async def get_agents_session(
|
async def get_agents_session(
|
||||||
self,
|
self,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
|
|
@ -666,9 +692,16 @@ class Agents(Protocol):
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/agents/{agent_id}/session/{session_id}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1
|
route="/agents/{agent_id}/session/{session_id}",
|
||||||
|
method="DELETE",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/agents/{agent_id}/session/{session_id}",
|
||||||
|
method="DELETE",
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
)
|
)
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
|
||||||
async def delete_agents_session(
|
async def delete_agents_session(
|
||||||
self,
|
self,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
|
|
@ -681,7 +714,12 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/agents/{agent_id}",
|
||||||
|
method="DELETE",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def delete_agent(
|
async def delete_agent(
|
||||||
self,
|
self,
|
||||||
|
|
@ -704,7 +742,12 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/agents/{agent_id}",
|
||||||
|
method="GET",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def get_agent(self, agent_id: str) -> Agent:
|
async def get_agent(self, agent_id: str) -> Agent:
|
||||||
"""Describe an agent by its ID.
|
"""Describe an agent by its ID.
|
||||||
|
|
@ -714,7 +757,12 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/agents/{agent_id}/sessions",
|
||||||
|
method="GET",
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def list_agent_sessions(
|
async def list_agent_sessions(
|
||||||
self,
|
self,
|
||||||
|
|
@ -738,6 +786,12 @@ class Agents(Protocol):
|
||||||
#
|
#
|
||||||
# Both of these APIs are inherently stateful.
|
# Both of these APIs are inherently stateful.
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/responses/{response_id}",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_openai_response(
|
async def get_openai_response(
|
||||||
self,
|
self,
|
||||||
|
|
@ -750,6 +804,7 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def create_openai_response(
|
async def create_openai_response(
|
||||||
self,
|
self,
|
||||||
|
|
@ -764,6 +819,12 @@ class Agents(Protocol):
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
include: list[str] | None = None,
|
include: list[str] | None = None,
|
||||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||||
|
shields: Annotated[
|
||||||
|
list[ResponseShield] | None,
|
||||||
|
ExtraBodyField(
|
||||||
|
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
"""Create a new OpenAI response.
|
"""Create a new OpenAI response.
|
||||||
|
|
||||||
|
|
@ -771,10 +832,12 @@ class Agents(Protocol):
|
||||||
:param model: The underlying LLM used for completions.
|
:param model: The underlying LLM used for completions.
|
||||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||||
:param include: (Optional) Additional fields to include in the response.
|
:param include: (Optional) Additional fields to include in the response.
|
||||||
|
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
|
||||||
:returns: An OpenAIResponseObject.
|
:returns: An OpenAIResponseObject.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_openai_responses(
|
async def list_openai_responses(
|
||||||
self,
|
self,
|
||||||
|
|
@ -793,6 +856,9 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_openai_response_input_items(
|
async def list_openai_response_input_items(
|
||||||
self,
|
self,
|
||||||
|
|
@ -815,6 +881,7 @@ class Agents(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||||
"""Delete an OpenAI response by its ID.
|
"""Delete an OpenAI response by its ID.
|
||||||
|
|
|
||||||
|
|
@ -888,6 +888,10 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
||||||
|
|
||||||
input: list[OpenAIResponseInput]
|
input: list[OpenAIResponseInput]
|
||||||
|
|
||||||
|
def to_response_object(self) -> OpenAIResponseObject:
|
||||||
|
"""Convert to OpenAIResponseObject by excluding input field."""
|
||||||
|
return OpenAIResponseObject(**{k: v for k, v in self.model_dump().items() if k != "input"})
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ListOpenAIResponseObject(BaseModel):
|
class ListOpenAIResponseObject(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ class Batches(Protocol):
|
||||||
Note: This API is currently under active development and may undergo changes.
|
Note: This API is currently under active development and may undergo changes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def create_batch(
|
async def create_batch(
|
||||||
self,
|
self,
|
||||||
|
|
@ -63,6 +64,7 @@ class Batches(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
||||||
"""Retrieve information about a specific batch.
|
"""Retrieve information about a specific batch.
|
||||||
|
|
@ -72,6 +74,7 @@ class Batches(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
||||||
"""Cancel a batch that is in progress.
|
"""Cancel a batch that is in progress.
|
||||||
|
|
@ -81,6 +84,7 @@ class Batches(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_batches(
|
async def list_batches(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
31
llama_stack/apis/conversations/__init__.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .conversations import (
|
||||||
|
Conversation,
|
||||||
|
ConversationCreateRequest,
|
||||||
|
ConversationDeletedResource,
|
||||||
|
ConversationItem,
|
||||||
|
ConversationItemCreateRequest,
|
||||||
|
ConversationItemDeletedResource,
|
||||||
|
ConversationItemList,
|
||||||
|
Conversations,
|
||||||
|
ConversationUpdateRequest,
|
||||||
|
Metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Conversation",
|
||||||
|
"ConversationCreateRequest",
|
||||||
|
"ConversationDeletedResource",
|
||||||
|
"ConversationItem",
|
||||||
|
"ConversationItemCreateRequest",
|
||||||
|
"ConversationItemDeletedResource",
|
||||||
|
"ConversationItemList",
|
||||||
|
"Conversations",
|
||||||
|
"ConversationUpdateRequest",
|
||||||
|
"Metadata",
|
||||||
|
]
|
||||||
260
llama_stack/apis/conversations/conversations.py
Normal file
|
|
@ -0,0 +1,260 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from openai import NOT_GIVEN
|
||||||
|
from openai._types import NotGiven
|
||||||
|
from openai.types.responses.response_includable import ResponseIncludable
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.apis.agents.openai_responses import (
|
||||||
|
OpenAIResponseMessage,
|
||||||
|
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||||
|
OpenAIResponseOutputMessageFunctionToolCall,
|
||||||
|
OpenAIResponseOutputMessageMCPCall,
|
||||||
|
OpenAIResponseOutputMessageMCPListTools,
|
||||||
|
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||||
|
)
|
||||||
|
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||||
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
|
Metadata = dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Conversation(BaseModel):
|
||||||
|
"""OpenAI-compatible conversation object."""
|
||||||
|
|
||||||
|
id: str = Field(..., description="The unique ID of the conversation.")
|
||||||
|
object: Literal["conversation"] = Field(
|
||||||
|
default="conversation", description="The object type, which is always conversation."
|
||||||
|
)
|
||||||
|
created_at: int = Field(
|
||||||
|
..., description="The time at which the conversation was created, measured in seconds since the Unix epoch."
|
||||||
|
)
|
||||||
|
metadata: Metadata | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.",
|
||||||
|
)
|
||||||
|
items: list[dict] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationMessage(BaseModel):
|
||||||
|
"""OpenAI-compatible message item for conversations."""
|
||||||
|
|
||||||
|
id: str = Field(..., description="unique identifier for this message")
|
||||||
|
content: list[dict] = Field(..., description="message content")
|
||||||
|
role: str = Field(..., description="message role")
|
||||||
|
status: str = Field(..., description="message status")
|
||||||
|
type: Literal["message"] = "message"
|
||||||
|
object: Literal["message"] = "message"
|
||||||
|
|
||||||
|
|
||||||
|
ConversationItem = Annotated[
|
||||||
|
OpenAIResponseMessage
|
||||||
|
| OpenAIResponseOutputMessageFunctionToolCall
|
||||||
|
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||||
|
| OpenAIResponseOutputMessageWebSearchToolCall
|
||||||
|
| OpenAIResponseOutputMessageMCPCall
|
||||||
|
| OpenAIResponseOutputMessageMCPListTools,
|
||||||
|
Field(discriminator="type"),
|
||||||
|
]
|
||||||
|
register_schema(ConversationItem, name="ConversationItem")
|
||||||
|
|
||||||
|
# Using OpenAI types directly caused issues but some notes for reference:
|
||||||
|
# Note that ConversationItem is a Annotated Union of the types below:
|
||||||
|
# from openai.types.responses import *
|
||||||
|
# from openai.types.responses.response_item import *
|
||||||
|
# from openai.types.conversations import ConversationItem
|
||||||
|
# f = [
|
||||||
|
# ResponseFunctionToolCallItem,
|
||||||
|
# ResponseFunctionToolCallOutputItem,
|
||||||
|
# ResponseFileSearchToolCall,
|
||||||
|
# ResponseFunctionWebSearch,
|
||||||
|
# ImageGenerationCall,
|
||||||
|
# ResponseComputerToolCall,
|
||||||
|
# ResponseComputerToolCallOutputItem,
|
||||||
|
# ResponseReasoningItem,
|
||||||
|
# ResponseCodeInterpreterToolCall,
|
||||||
|
# LocalShellCall,
|
||||||
|
# LocalShellCallOutput,
|
||||||
|
# McpListTools,
|
||||||
|
# McpApprovalRequest,
|
||||||
|
# McpApprovalResponse,
|
||||||
|
# McpCall,
|
||||||
|
# ResponseCustomToolCall,
|
||||||
|
# ResponseCustomToolCallOutput
|
||||||
|
# ]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationCreateRequest(BaseModel):
|
||||||
|
"""Request body for creating a conversation."""
|
||||||
|
|
||||||
|
items: list[ConversationItem] | None = Field(
|
||||||
|
default=[],
|
||||||
|
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||||
|
max_length=20,
|
||||||
|
)
|
||||||
|
metadata: Metadata | None = Field(
|
||||||
|
default={},
|
||||||
|
description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
|
||||||
|
max_length=16,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationUpdateRequest(BaseModel):
|
||||||
|
"""Request body for updating a conversation."""
|
||||||
|
|
||||||
|
metadata: Metadata = Field(
|
||||||
|
...,
|
||||||
|
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationDeletedResource(BaseModel):
|
||||||
|
"""Response for deleted conversation."""
|
||||||
|
|
||||||
|
id: str = Field(..., description="The deleted conversation identifier")
|
||||||
|
object: str = Field(default="conversation.deleted", description="Object type")
|
||||||
|
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationItemCreateRequest(BaseModel):
|
||||||
|
"""Request body for creating conversation items."""
|
||||||
|
|
||||||
|
items: list[ConversationItem] = Field(
|
||||||
|
...,
|
||||||
|
description="Items to include in the conversation context. You may add up to 20 items at a time.",
|
||||||
|
max_length=20,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationItemList(BaseModel):
|
||||||
|
"""List of conversation items with pagination."""
|
||||||
|
|
||||||
|
object: str = Field(default="list", description="Object type")
|
||||||
|
data: list[ConversationItem] = Field(..., description="List of conversation items")
|
||||||
|
first_id: str | None = Field(default=None, description="The ID of the first item in the list")
|
||||||
|
last_id: str | None = Field(default=None, description="The ID of the last item in the list")
|
||||||
|
has_more: bool = Field(default=False, description="Whether there are more items available")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ConversationItemDeletedResource(BaseModel):
|
||||||
|
"""Response for deleted conversation item."""
|
||||||
|
|
||||||
|
id: str = Field(..., description="The deleted item identifier")
|
||||||
|
object: str = Field(default="conversation.item.deleted", description="Object type")
|
||||||
|
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
@trace_protocol
|
||||||
|
class Conversations(Protocol):
|
||||||
|
"""Protocol for conversation management operations."""
|
||||||
|
|
||||||
|
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
async def create_conversation(
|
||||||
|
self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
|
||||||
|
) -> Conversation:
|
||||||
|
"""Create a conversation.
|
||||||
|
|
||||||
|
:param items: Initial items to include in the conversation context.
|
||||||
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
|
:returns: The created conversation object.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
|
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||||
|
"""Get a conversation with the given ID.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:returns: The conversation object.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||||
|
"""Update a conversation's metadata with the given ID.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
|
:returns: The updated conversation object.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
|
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||||
|
"""Delete a conversation with the given ID.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:returns: The deleted conversation resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||||
|
"""Create items in the conversation.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:param items: Items to include in the conversation context.
|
||||||
|
:returns: List of created items.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
|
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||||
|
"""Retrieve a conversation item.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:param item_id: The item identifier.
|
||||||
|
:returns: The conversation item.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
|
async def list(
|
||||||
|
self,
|
||||||
|
conversation_id: str,
|
||||||
|
after: str | NotGiven = NOT_GIVEN,
|
||||||
|
include: list[ResponseIncludable] | NotGiven = NOT_GIVEN,
|
||||||
|
limit: int | NotGiven = NOT_GIVEN,
|
||||||
|
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||||
|
) -> ConversationItemList:
|
||||||
|
"""List items in the conversation.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:param after: An item ID to list items after, used in pagination.
|
||||||
|
:param include: Specify additional output data to include in the response.
|
||||||
|
:param limit: A limit on the number of objects to be returned (1-100, default 20).
|
||||||
|
:param order: The order to return items in (asc or desc, default desc).
|
||||||
|
:returns: List of conversation items.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
|
async def openai_delete_conversation_item(
|
||||||
|
self, conversation_id: str, item_id: str
|
||||||
|
) -> ConversationItemDeletedResource:
|
||||||
|
"""Delete a conversation item.
|
||||||
|
|
||||||
|
:param conversation_id: The conversation identifier.
|
||||||
|
:param item_id: The item identifier.
|
||||||
|
:returns: The deleted item resource.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import Any, Protocol, runtime_checkable
|
||||||
|
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.apis.datasets import Dataset
|
from llama_stack.apis.datasets import Dataset
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
|
||||||
from llama_stack.schema_utils import webmethod
|
from llama_stack.schema_utils import webmethod
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -21,7 +21,8 @@ class DatasetIO(Protocol):
|
||||||
# keeping for aligning with inference/safety, but this is not used
|
# keeping for aligning with inference/safety, but this is not used
|
||||||
dataset_store: DatasetStore
|
dataset_store: DatasetStore
|
||||||
|
|
||||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def iterrows(
|
async def iterrows(
|
||||||
self,
|
self,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
|
|
@ -45,7 +46,10 @@ class DatasetIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
|
||||||
|
)
|
||||||
|
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
||||||
"""Append rows to a dataset.
|
"""Append rows to a dataset.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.resource import Resource, ResourceType
|
from llama_stack.apis.resource import Resource, ResourceType
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -146,7 +146,8 @@ class ListDatasetsResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class Datasets(Protocol):
|
class Datasets(Protocol):
|
||||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def register_dataset(
|
async def register_dataset(
|
||||||
self,
|
self,
|
||||||
purpose: DatasetPurpose,
|
purpose: DatasetPurpose,
|
||||||
|
|
@ -215,7 +216,8 @@ class Datasets(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def get_dataset(
|
async def get_dataset(
|
||||||
self,
|
self,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
|
|
@ -227,7 +229,8 @@ class Datasets(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def list_datasets(self) -> ListDatasetsResponse:
|
async def list_datasets(self) -> ListDatasetsResponse:
|
||||||
"""List all datasets.
|
"""List all datasets.
|
||||||
|
|
||||||
|
|
@ -235,7 +238,8 @@ class Datasets(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
|
||||||
async def unregister_dataset(
|
async def unregister_dataset(
|
||||||
self,
|
self,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
tool_groups = "tool_groups"
|
tool_groups = "tool_groups"
|
||||||
files = "files"
|
files = "files"
|
||||||
prompts = "prompts"
|
prompts = "prompts"
|
||||||
|
conversations = "conversations"
|
||||||
|
|
||||||
# built-in API
|
# built-in API
|
||||||
inspect = "inspect"
|
inspect = "inspect"
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Files(Protocol):
|
class Files(Protocol):
|
||||||
# OpenAI Files API Endpoints
|
# OpenAI Files API Endpoints
|
||||||
|
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_upload_file(
|
async def openai_upload_file(
|
||||||
self,
|
self,
|
||||||
|
|
@ -127,6 +128,7 @@ class Files(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_list_files(
|
async def openai_list_files(
|
||||||
self,
|
self,
|
||||||
|
|
@ -146,6 +148,7 @@ class Files(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_retrieve_file(
|
async def openai_retrieve_file(
|
||||||
self,
|
self,
|
||||||
|
|
@ -159,6 +162,7 @@ class Files(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_delete_file(
|
async def openai_delete_file(
|
||||||
self,
|
self,
|
||||||
|
|
@ -172,6 +176,7 @@ class Files(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_retrieve_file_content(
|
async def openai_retrieve_file_content(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -27,14 +27,12 @@ from llama_stack.models.llama.datatypes import (
|
||||||
StopReason,
|
StopReason,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolParamDefinition,
|
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
register_schema(ToolCall)
|
register_schema(ToolCall)
|
||||||
register_schema(ToolParamDefinition)
|
|
||||||
register_schema(ToolDefinition)
|
register_schema(ToolDefinition)
|
||||||
|
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
|
@ -1008,67 +1006,6 @@ class InferenceProvider(Protocol):
|
||||||
|
|
||||||
model_store: ModelStore | None = None
|
model_store: ModelStore | None = None
|
||||||
|
|
||||||
async def completion(
|
|
||||||
self,
|
|
||||||
model_id: str,
|
|
||||||
content: InterleavedContent,
|
|
||||||
sampling_params: SamplingParams | None = None,
|
|
||||||
response_format: ResponseFormat | None = None,
|
|
||||||
stream: bool | None = False,
|
|
||||||
logprobs: LogProbConfig | None = None,
|
|
||||||
) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
|
|
||||||
"""Generate a completion for the given content using the specified model.
|
|
||||||
|
|
||||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
|
||||||
:param content: The content to generate a completion for.
|
|
||||||
:param sampling_params: (Optional) Parameters to control the sampling strategy.
|
|
||||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
|
|
||||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
|
||||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
|
||||||
:returns: If stream=False, returns a CompletionResponse with the full completion.
|
|
||||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
|
|
||||||
"""
|
|
||||||
...
|
|
||||||
|
|
||||||
async def chat_completion(
|
|
||||||
self,
|
|
||||||
model_id: str,
|
|
||||||
messages: list[Message],
|
|
||||||
sampling_params: SamplingParams | None = None,
|
|
||||||
tools: list[ToolDefinition] | None = None,
|
|
||||||
tool_choice: ToolChoice | None = ToolChoice.auto,
|
|
||||||
tool_prompt_format: ToolPromptFormat | None = None,
|
|
||||||
response_format: ResponseFormat | None = None,
|
|
||||||
stream: bool | None = False,
|
|
||||||
logprobs: LogProbConfig | None = None,
|
|
||||||
tool_config: ToolConfig | None = None,
|
|
||||||
) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
|
|
||||||
"""Generate a chat completion for the given messages using the specified model.
|
|
||||||
|
|
||||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
|
||||||
:param messages: List of messages in the conversation.
|
|
||||||
:param sampling_params: Parameters to control the sampling strategy.
|
|
||||||
:param tools: (Optional) List of tool definitions available to the model.
|
|
||||||
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
|
||||||
.. deprecated::
|
|
||||||
Use tool_config instead.
|
|
||||||
:param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
|
|
||||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
|
||||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
|
|
||||||
- `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
|
|
||||||
.. deprecated::
|
|
||||||
Use tool_config instead.
|
|
||||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding. There are two options:
|
|
||||||
- `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format.
|
|
||||||
- `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it.
|
|
||||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
|
||||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
|
||||||
:param tool_config: (Optional) Configuration for tool use.
|
|
||||||
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
|
|
||||||
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
|
|
||||||
"""
|
|
||||||
...
|
|
||||||
|
|
||||||
@webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def rerank(
|
async def rerank(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1088,6 +1025,7 @@ class InferenceProvider(Protocol):
|
||||||
raise NotImplementedError("Reranking is not implemented")
|
raise NotImplementedError("Reranking is not implemented")
|
||||||
return # this is so mypy's safe-super rule will consider the method concrete
|
return # this is so mypy's safe-super rule will consider the method concrete
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1139,6 +1077,7 @@ class InferenceProvider(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1195,6 +1134,7 @@ class InferenceProvider(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1224,6 +1164,7 @@ class Inference(InferenceProvider):
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_chat_completions(
|
async def list_chat_completions(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1242,6 +1183,9 @@ class Inference(InferenceProvider):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError("List chat completions is not implemented")
|
raise NotImplementedError("List chat completions is not implemented")
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||||
"""Describe a chat completion by its ID.
|
"""Describe a chat completion by its ID.
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,14 @@ class Models(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
|
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||||
|
"""List models using the OpenAI API.
|
||||||
|
|
||||||
|
:returns: A OpenAIListModelsResponse.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_model(
|
async def get_model(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -114,6 +114,7 @@ class Safety(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||||
"""Classifies if text and/or image inputs are potentially harmful.
|
"""Classifies if text and/or image inputs are potentially harmful.
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from typing import (
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
from llama_stack.models.llama.datatypes import Primitive
|
from llama_stack.models.llama.datatypes import Primitive
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
|
|
@ -426,7 +426,14 @@ class Telemetry(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/telemetry/traces",
|
||||||
|
method="POST",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def query_traces(
|
async def query_traces(
|
||||||
self,
|
self,
|
||||||
attribute_filters: list[QueryCondition] | None = None,
|
attribute_filters: list[QueryCondition] | None = None,
|
||||||
|
|
@ -445,7 +452,17 @@ class Telemetry(Protocol):
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
route="/telemetry/traces/{trace_id:path}",
|
||||||
|
method="GET",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/telemetry/traces/{trace_id:path}",
|
||||||
|
method="GET",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
)
|
)
|
||||||
async def get_trace(self, trace_id: str) -> Trace:
|
async def get_trace(self, trace_id: str) -> Trace:
|
||||||
"""Get a trace by its ID.
|
"""Get a trace by its ID.
|
||||||
|
|
@ -459,8 +476,15 @@ class Telemetry(Protocol):
|
||||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||||
method="GET",
|
method="GET",
|
||||||
required_scope=REQUIRED_SCOPE,
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
deprecated=True,
|
||||||
level=LLAMA_STACK_API_V1,
|
level=LLAMA_STACK_API_V1,
|
||||||
)
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||||
|
method="GET",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
|
)
|
||||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||||
"""Get a span by its ID.
|
"""Get a span by its ID.
|
||||||
|
|
||||||
|
|
@ -473,9 +497,16 @@ class Telemetry(Protocol):
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/telemetry/spans/{span_id:path}/tree",
|
route="/telemetry/spans/{span_id:path}/tree",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
deprecated=True,
|
||||||
required_scope=REQUIRED_SCOPE,
|
required_scope=REQUIRED_SCOPE,
|
||||||
level=LLAMA_STACK_API_V1,
|
level=LLAMA_STACK_API_V1,
|
||||||
)
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/telemetry/spans/{span_id:path}/tree",
|
||||||
|
method="POST",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
|
)
|
||||||
async def get_span_tree(
|
async def get_span_tree(
|
||||||
self,
|
self,
|
||||||
span_id: str,
|
span_id: str,
|
||||||
|
|
@ -491,7 +522,14 @@ class Telemetry(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
@webmethod(
|
||||||
|
route="/telemetry/spans",
|
||||||
|
method="POST",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def query_spans(
|
async def query_spans(
|
||||||
self,
|
self,
|
||||||
attribute_filters: list[QueryCondition],
|
attribute_filters: list[QueryCondition],
|
||||||
|
|
@ -507,7 +545,8 @@ class Telemetry(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||||
|
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
async def save_spans_to_dataset(
|
async def save_spans_to_dataset(
|
||||||
self,
|
self,
|
||||||
attribute_filters: list[QueryCondition],
|
attribute_filters: list[QueryCondition],
|
||||||
|
|
@ -525,7 +564,17 @@ class Telemetry(Protocol):
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
route="/telemetry/metrics/{metric_name}",
|
||||||
|
method="POST",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
deprecated=True,
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/telemetry/metrics/{metric_name}",
|
||||||
|
method="POST",
|
||||||
|
required_scope=REQUIRED_SCOPE,
|
||||||
|
level=LLAMA_STACK_API_V1ALPHA,
|
||||||
)
|
)
|
||||||
async def query_metrics(
|
async def query_metrics(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Literal, Protocol
|
from typing import Any, Literal, Protocol
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel
|
||||||
from typing_extensions import runtime_checkable
|
from typing_extensions import runtime_checkable
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||||
|
|
@ -19,59 +19,23 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
||||||
from .rag_tool import RAGToolRuntime
|
from .rag_tool import RAGToolRuntime
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class ToolParameter(BaseModel):
|
|
||||||
"""Parameter definition for a tool.
|
|
||||||
|
|
||||||
:param name: Name of the parameter
|
|
||||||
:param parameter_type: Type of the parameter (e.g., string, integer)
|
|
||||||
:param description: Human-readable description of what the parameter does
|
|
||||||
:param required: Whether this parameter is required for tool invocation
|
|
||||||
:param items: Type of the elements when parameter_type is array
|
|
||||||
:param title: (Optional) Title of the parameter
|
|
||||||
:param default: (Optional) Default value for the parameter if not provided
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
parameter_type: str
|
|
||||||
description: str
|
|
||||||
required: bool = Field(default=True)
|
|
||||||
items: dict | None = None
|
|
||||||
title: str | None = None
|
|
||||||
default: Any | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class Tool(Resource):
|
|
||||||
"""A tool that can be invoked by agents.
|
|
||||||
|
|
||||||
:param type: Type of resource, always 'tool'
|
|
||||||
:param toolgroup_id: ID of the tool group this tool belongs to
|
|
||||||
:param description: Human-readable description of what the tool does
|
|
||||||
:param parameters: List of parameters this tool accepts
|
|
||||||
:param metadata: (Optional) Additional metadata about the tool
|
|
||||||
"""
|
|
||||||
|
|
||||||
type: Literal[ResourceType.tool] = ResourceType.tool
|
|
||||||
toolgroup_id: str
|
|
||||||
description: str
|
|
||||||
parameters: list[ToolParameter]
|
|
||||||
metadata: dict[str, Any] | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ToolDef(BaseModel):
|
class ToolDef(BaseModel):
|
||||||
"""Tool definition used in runtime contexts.
|
"""Tool definition used in runtime contexts.
|
||||||
|
|
||||||
:param name: Name of the tool
|
:param name: Name of the tool
|
||||||
:param description: (Optional) Human-readable description of what the tool does
|
:param description: (Optional) Human-readable description of what the tool does
|
||||||
:param parameters: (Optional) List of parameters this tool accepts
|
:param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema)
|
||||||
|
:param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema)
|
||||||
:param metadata: (Optional) Additional metadata about the tool
|
:param metadata: (Optional) Additional metadata about the tool
|
||||||
|
:param toolgroup_id: (Optional) ID of the tool group this tool belongs to
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
toolgroup_id: str | None = None
|
||||||
name: str
|
name: str
|
||||||
description: str | None = None
|
description: str | None = None
|
||||||
parameters: list[ToolParameter] | None = None
|
input_schema: dict[str, Any] | None = None
|
||||||
|
output_schema: dict[str, Any] | None = None
|
||||||
metadata: dict[str, Any] | None = None
|
metadata: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -122,7 +86,7 @@ class ToolInvocationResult(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class ToolStore(Protocol):
|
class ToolStore(Protocol):
|
||||||
async def get_tool(self, tool_name: str) -> Tool: ...
|
async def get_tool(self, tool_name: str) -> ToolDef: ...
|
||||||
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
|
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -135,15 +99,6 @@ class ListToolGroupsResponse(BaseModel):
|
||||||
data: list[ToolGroup]
|
data: list[ToolGroup]
|
||||||
|
|
||||||
|
|
||||||
class ListToolsResponse(BaseModel):
|
|
||||||
"""Response containing a list of tools.
|
|
||||||
|
|
||||||
:param data: List of tools
|
|
||||||
"""
|
|
||||||
|
|
||||||
data: list[Tool]
|
|
||||||
|
|
||||||
|
|
||||||
class ListToolDefsResponse(BaseModel):
|
class ListToolDefsResponse(BaseModel):
|
||||||
"""Response containing a list of tool definitions.
|
"""Response containing a list of tool definitions.
|
||||||
|
|
||||||
|
|
@ -194,11 +149,11 @@ class ToolGroups(Protocol):
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
|
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||||
"""List tools with optional tool group.
|
"""List tools with optional tool group.
|
||||||
|
|
||||||
:param toolgroup_id: The ID of the tool group to list tools for.
|
:param toolgroup_id: The ID of the tool group to list tools for.
|
||||||
:returns: A ListToolsResponse.
|
:returns: A ListToolDefsResponse.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
@ -206,11 +161,11 @@ class ToolGroups(Protocol):
|
||||||
async def get_tool(
|
async def get_tool(
|
||||||
self,
|
self,
|
||||||
tool_name: str,
|
tool_name: str,
|
||||||
) -> Tool:
|
) -> ToolDef:
|
||||||
"""Get a tool by its name.
|
"""Get a tool by its name.
|
||||||
|
|
||||||
:param tool_name: The name of the tool to get.
|
:param tool_name: The name of the tool to get.
|
||||||
:returns: A Tool.
|
:returns: A ToolDef.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -512,6 +512,7 @@ class VectorIO(Protocol):
|
||||||
...
|
...
|
||||||
|
|
||||||
# OpenAI Vector Stores API endpoints
|
# OpenAI Vector Stores API endpoints
|
||||||
|
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_create_vector_store(
|
async def openai_create_vector_store(
|
||||||
self,
|
self,
|
||||||
|
|
@ -538,6 +539,7 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_list_vector_stores(
|
async def openai_list_vector_stores(
|
||||||
self,
|
self,
|
||||||
|
|
@ -556,6 +558,9 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_retrieve_vector_store(
|
async def openai_retrieve_vector_store(
|
||||||
self,
|
self,
|
||||||
|
|
@ -568,6 +573,9 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}",
|
route="/vector_stores/{vector_store_id}",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|
@ -590,6 +598,9 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}",
|
route="/vector_stores/{vector_store_id}",
|
||||||
method="DELETE",
|
method="DELETE",
|
||||||
|
|
@ -606,6 +617,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/search",
|
||||||
|
method="POST",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/search",
|
route="/vector_stores/{vector_store_id}/search",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|
@ -638,6 +655,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
||||||
|
method="POST",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files",
|
route="/vector_stores/{vector_store_id}/files",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|
@ -660,6 +683,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files",
|
route="/vector_stores/{vector_store_id}/files",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|
@ -686,6 +715,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|
@ -704,6 +739,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|
@ -722,6 +763,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
|
method="POST",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|
@ -742,6 +789,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
|
method="DELETE",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||||
method="DELETE",
|
method="DELETE",
|
||||||
|
|
@ -765,6 +818,12 @@ class VectorIO(Protocol):
|
||||||
method="POST",
|
method="POST",
|
||||||
level=LLAMA_STACK_API_V1,
|
level=LLAMA_STACK_API_V1,
|
||||||
)
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
|
||||||
|
method="POST",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
async def openai_create_vector_store_file_batch(
|
async def openai_create_vector_store_file_batch(
|
||||||
self,
|
self,
|
||||||
vector_store_id: str,
|
vector_store_id: str,
|
||||||
|
|
@ -787,6 +846,12 @@ class VectorIO(Protocol):
|
||||||
method="GET",
|
method="GET",
|
||||||
level=LLAMA_STACK_API_V1,
|
level=LLAMA_STACK_API_V1,
|
||||||
)
|
)
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
async def openai_retrieve_vector_store_file_batch(
|
async def openai_retrieve_vector_store_file_batch(
|
||||||
self,
|
self,
|
||||||
batch_id: str,
|
batch_id: str,
|
||||||
|
|
@ -800,6 +865,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
||||||
|
method="GET",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|
@ -828,6 +899,12 @@ class VectorIO(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(
|
||||||
|
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
||||||
|
method="POST",
|
||||||
|
level=LLAMA_STACK_API_V1,
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,18 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import ssl
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
import yaml
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||||
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
|
||||||
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||||
|
|
@ -146,23 +153,7 @@ class StackRun(Subcommand):
|
||||||
# using the current environment packages.
|
# using the current environment packages.
|
||||||
if not image_type and not image_name:
|
if not image_type and not image_name:
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||||
from llama_stack.core.server.server import main as server_main
|
self._uvicorn_run(config_file, args)
|
||||||
|
|
||||||
# Build the server args from the current args passed to the CLI
|
|
||||||
server_args = argparse.Namespace()
|
|
||||||
for arg in vars(args):
|
|
||||||
# If this is a function, avoid passing it
|
|
||||||
# "args" contains:
|
|
||||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
|
||||||
if callable(getattr(args, arg)):
|
|
||||||
continue
|
|
||||||
if arg == "config":
|
|
||||||
server_args.config = str(config_file)
|
|
||||||
else:
|
|
||||||
setattr(server_args, arg, getattr(args, arg))
|
|
||||||
|
|
||||||
# Run the server
|
|
||||||
server_main(server_args)
|
|
||||||
else:
|
else:
|
||||||
run_args = formulate_run_args(image_type, image_name)
|
run_args = formulate_run_args(image_type, image_name)
|
||||||
|
|
||||||
|
|
@ -184,6 +175,76 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
run_command(run_args)
|
run_command(run_args)
|
||||||
|
|
||||||
|
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||||
|
if not config_file:
|
||||||
|
self.parser.error("Config file is required")
|
||||||
|
|
||||||
|
# Set environment variables if provided
|
||||||
|
if args.env:
|
||||||
|
for env_pair in args.env:
|
||||||
|
try:
|
||||||
|
key, value = validate_env_pair(env_pair)
|
||||||
|
logger.info(f"Setting environment variable {key} => {value}")
|
||||||
|
os.environ[key] = value
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(f"Error: {str(e)}")
|
||||||
|
self.parser.error(f"Invalid environment variable format: {env_pair}")
|
||||||
|
|
||||||
|
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||||
|
with open(config_file) as fp:
|
||||||
|
config_contents = yaml.safe_load(fp)
|
||||||
|
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||||
|
logger_config = LoggingConfig(**cfg)
|
||||||
|
else:
|
||||||
|
logger_config = None
|
||||||
|
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||||
|
|
||||||
|
port = args.port or config.server.port
|
||||||
|
host = config.server.host or ["::", "0.0.0.0"]
|
||||||
|
|
||||||
|
# Set the config file in environment so create_app can find it
|
||||||
|
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||||
|
|
||||||
|
uvicorn_config = {
|
||||||
|
"factory": True,
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"lifespan": "on",
|
||||||
|
"log_level": logger.getEffectiveLevel(),
|
||||||
|
"log_config": logger_config,
|
||||||
|
}
|
||||||
|
|
||||||
|
keyfile = config.server.tls_keyfile
|
||||||
|
certfile = config.server.tls_certfile
|
||||||
|
if keyfile and certfile:
|
||||||
|
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||||
|
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||||
|
if config.server.tls_cafile:
|
||||||
|
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||||
|
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||||
|
|
||||||
|
logger.info(f"Listening on {host}:{port}")
|
||||||
|
|
||||||
|
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||||
|
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||||
|
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||||
|
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||||
|
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||||
|
# have a default handler for it.
|
||||||
|
#
|
||||||
|
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||||
|
# signal handling but this is quite intrusive and not worth the effort.
|
||||||
|
try:
|
||||||
|
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||||
|
except (KeyboardInterrupt, SystemExit):
|
||||||
|
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||||
|
|
||||||
def _start_ui_development_server(self, stack_server_port: int):
|
def _start_ui_development_server(self, stack_server_port: int):
|
||||||
logger.info("Attempting to start UI development server...")
|
logger.info("Attempting to start UI development server...")
|
||||||
# Check if npm is available
|
# Check if npm is available
|
||||||
|
|
|
||||||
|
|
@ -324,14 +324,14 @@ fi
|
||||||
RUN pip uninstall -y uv
|
RUN pip uninstall -y uv
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# If a run config is provided, we use the --config flag
|
# If a run config is provided, we use the llama stack CLI
|
||||||
if [[ -n "$run_config" ]]; then
|
if [[ -n "$run_config" ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||||
EOF
|
EOF
|
||||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
5
llama_stack/core/conversations/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
306
llama_stack/core/conversations/conversations.py
Normal file
|
|
@ -0,0 +1,306 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import secrets
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from openai import NOT_GIVEN
|
||||||
|
from pydantic import BaseModel, TypeAdapter
|
||||||
|
|
||||||
|
from llama_stack.apis.conversations.conversations import (
|
||||||
|
Conversation,
|
||||||
|
ConversationDeletedResource,
|
||||||
|
ConversationItem,
|
||||||
|
ConversationItemDeletedResource,
|
||||||
|
ConversationItemList,
|
||||||
|
Conversations,
|
||||||
|
Metadata,
|
||||||
|
)
|
||||||
|
from llama_stack.core.datatypes import AccessRule
|
||||||
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||||
|
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import (
|
||||||
|
SqliteSqlStoreConfig,
|
||||||
|
SqlStoreConfig,
|
||||||
|
sqlstore_impl,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="openai::conversations")
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationServiceConfig(BaseModel):
|
||||||
|
"""Configuration for the built-in conversation service.
|
||||||
|
|
||||||
|
:param conversations_store: SQL store configuration for conversations (defaults to SQLite)
|
||||||
|
:param policy: Access control rules
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
|
||||||
|
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
|
||||||
|
)
|
||||||
|
policy: list[AccessRule] = []
|
||||||
|
|
||||||
|
|
||||||
|
async def get_provider_impl(config: ConversationServiceConfig, deps: dict[Any, Any]):
|
||||||
|
"""Get the conversation service implementation."""
|
||||||
|
impl = ConversationServiceImpl(config, deps)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationServiceImpl(Conversations):
|
||||||
|
"""Built-in conversation service implementation using AuthorizedSqlStore."""
|
||||||
|
|
||||||
|
def __init__(self, config: ConversationServiceConfig, deps: dict[Any, Any]):
|
||||||
|
self.config = config
|
||||||
|
self.deps = deps
|
||||||
|
self.policy = config.policy
|
||||||
|
|
||||||
|
base_sql_store = sqlstore_impl(config.conversations_store)
|
||||||
|
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""Initialize the store and create tables."""
|
||||||
|
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
|
||||||
|
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
|
||||||
|
|
||||||
|
await self.sql_store.create_table(
|
||||||
|
"openai_conversations",
|
||||||
|
{
|
||||||
|
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
||||||
|
"created_at": ColumnType.INTEGER,
|
||||||
|
"items": ColumnType.JSON,
|
||||||
|
"metadata": ColumnType.JSON,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.sql_store.create_table(
|
||||||
|
"conversation_items",
|
||||||
|
{
|
||||||
|
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
||||||
|
"conversation_id": ColumnType.STRING,
|
||||||
|
"created_at": ColumnType.INTEGER,
|
||||||
|
"item_data": ColumnType.JSON,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def create_conversation(
|
||||||
|
self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
|
||||||
|
) -> Conversation:
|
||||||
|
"""Create a conversation."""
|
||||||
|
random_bytes = secrets.token_bytes(24)
|
||||||
|
conversation_id = f"conv_{random_bytes.hex()}"
|
||||||
|
created_at = int(time.time())
|
||||||
|
|
||||||
|
record_data = {
|
||||||
|
"id": conversation_id,
|
||||||
|
"created_at": created_at,
|
||||||
|
"items": [],
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
await self.sql_store.insert(
|
||||||
|
table="openai_conversations",
|
||||||
|
data=record_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
if items:
|
||||||
|
item_records = []
|
||||||
|
for item in items:
|
||||||
|
item_dict = item.model_dump()
|
||||||
|
item_id = self._get_or_generate_item_id(item, item_dict)
|
||||||
|
|
||||||
|
item_record = {
|
||||||
|
"id": item_id,
|
||||||
|
"conversation_id": conversation_id,
|
||||||
|
"created_at": created_at,
|
||||||
|
"item_data": item_dict,
|
||||||
|
}
|
||||||
|
|
||||||
|
item_records.append(item_record)
|
||||||
|
|
||||||
|
await self.sql_store.insert(table="conversation_items", data=item_records)
|
||||||
|
|
||||||
|
conversation = Conversation(
|
||||||
|
id=conversation_id,
|
||||||
|
created_at=created_at,
|
||||||
|
metadata=metadata,
|
||||||
|
object="conversation",
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Created conversation {conversation_id}")
|
||||||
|
return conversation
|
||||||
|
|
||||||
|
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||||
|
"""Get a conversation with the given ID."""
|
||||||
|
record = await self.sql_store.fetch_one(table="openai_conversations", where={"id": conversation_id})
|
||||||
|
|
||||||
|
if record is None:
|
||||||
|
raise ValueError(f"Conversation {conversation_id} not found")
|
||||||
|
|
||||||
|
return Conversation(
|
||||||
|
id=record["id"], created_at=record["created_at"], metadata=record.get("metadata"), object="conversation"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||||
|
"""Update a conversation's metadata with the given ID"""
|
||||||
|
await self.sql_store.update(
|
||||||
|
table="openai_conversations", data={"metadata": metadata}, where={"id": conversation_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
return await self.get_conversation(conversation_id)
|
||||||
|
|
||||||
|
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||||
|
"""Delete a conversation with the given ID."""
|
||||||
|
await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id})
|
||||||
|
|
||||||
|
logger.info(f"Deleted conversation {conversation_id}")
|
||||||
|
return ConversationDeletedResource(id=conversation_id)
|
||||||
|
|
||||||
|
def _validate_conversation_id(self, conversation_id: str) -> None:
|
||||||
|
"""Validate conversation ID format."""
|
||||||
|
if not conversation_id.startswith("conv_"):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid 'conversation_id': '{conversation_id}'. Expected an ID that begins with 'conv_'."
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_or_generate_item_id(self, item: ConversationItem, item_dict: dict) -> str:
|
||||||
|
"""Get existing item ID or generate one if missing."""
|
||||||
|
if item.id is None:
|
||||||
|
random_bytes = secrets.token_bytes(24)
|
||||||
|
if item.type == "message":
|
||||||
|
item_id = f"msg_{random_bytes.hex()}"
|
||||||
|
else:
|
||||||
|
item_id = f"item_{random_bytes.hex()}"
|
||||||
|
item_dict["id"] = item_id
|
||||||
|
return item_id
|
||||||
|
return item.id
|
||||||
|
|
||||||
|
async def _get_validated_conversation(self, conversation_id: str) -> Conversation:
|
||||||
|
"""Validate conversation ID and return the conversation if it exists."""
|
||||||
|
self._validate_conversation_id(conversation_id)
|
||||||
|
return await self.get_conversation(conversation_id)
|
||||||
|
|
||||||
|
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||||
|
"""Create (add) items to a conversation."""
|
||||||
|
await self._get_validated_conversation(conversation_id)
|
||||||
|
|
||||||
|
created_items = []
|
||||||
|
created_at = int(time.time())
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
item_dict = item.model_dump()
|
||||||
|
item_id = self._get_or_generate_item_id(item, item_dict)
|
||||||
|
|
||||||
|
item_record = {
|
||||||
|
"id": item_id,
|
||||||
|
"conversation_id": conversation_id,
|
||||||
|
"created_at": created_at,
|
||||||
|
"item_data": item_dict,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
|
||||||
|
try:
|
||||||
|
await self.sql_store.insert(table="conversation_items", data=item_record)
|
||||||
|
except Exception:
|
||||||
|
# If insert fails due to ID conflict, update existing record
|
||||||
|
await self.sql_store.update(
|
||||||
|
table="conversation_items",
|
||||||
|
data={"created_at": created_at, "item_data": item_dict},
|
||||||
|
where={"id": item_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
created_items.append(item_dict)
|
||||||
|
|
||||||
|
logger.info(f"Created {len(created_items)} items in conversation {conversation_id}")
|
||||||
|
|
||||||
|
# Convert created items (dicts) to proper ConversationItem types
|
||||||
|
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||||
|
response_items: list[ConversationItem] = [adapter.validate_python(item_dict) for item_dict in created_items]
|
||||||
|
|
||||||
|
return ConversationItemList(
|
||||||
|
data=response_items,
|
||||||
|
first_id=created_items[0]["id"] if created_items else None,
|
||||||
|
last_id=created_items[-1]["id"] if created_items else None,
|
||||||
|
has_more=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||||
|
"""Retrieve a conversation item."""
|
||||||
|
if not conversation_id:
|
||||||
|
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
||||||
|
if not item_id:
|
||||||
|
raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
|
||||||
|
|
||||||
|
# Get item from conversation_items table
|
||||||
|
record = await self.sql_store.fetch_one(
|
||||||
|
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
if record is None:
|
||||||
|
raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
|
||||||
|
|
||||||
|
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||||
|
return adapter.validate_python(record["item_data"])
|
||||||
|
|
||||||
|
async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN):
|
||||||
|
"""List items in the conversation."""
|
||||||
|
result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
|
||||||
|
records = result.data
|
||||||
|
|
||||||
|
if order != NOT_GIVEN and order == "asc":
|
||||||
|
records.sort(key=lambda x: x["created_at"])
|
||||||
|
else:
|
||||||
|
records.sort(key=lambda x: x["created_at"], reverse=True)
|
||||||
|
|
||||||
|
actual_limit = 20
|
||||||
|
if limit != NOT_GIVEN and isinstance(limit, int):
|
||||||
|
actual_limit = limit
|
||||||
|
|
||||||
|
records = records[:actual_limit]
|
||||||
|
items = [record["item_data"] for record in records]
|
||||||
|
|
||||||
|
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||||
|
response_items: list[ConversationItem] = [adapter.validate_python(item) for item in items]
|
||||||
|
|
||||||
|
first_id = response_items[0].id if response_items else None
|
||||||
|
last_id = response_items[-1].id if response_items else None
|
||||||
|
|
||||||
|
return ConversationItemList(
|
||||||
|
data=response_items,
|
||||||
|
first_id=first_id,
|
||||||
|
last_id=last_id,
|
||||||
|
has_more=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def openai_delete_conversation_item(
|
||||||
|
self, conversation_id: str, item_id: str
|
||||||
|
) -> ConversationItemDeletedResource:
|
||||||
|
"""Delete a conversation item."""
|
||||||
|
if not conversation_id:
|
||||||
|
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
||||||
|
if not item_id:
|
||||||
|
raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
|
||||||
|
|
||||||
|
_ = await self._get_validated_conversation(conversation_id)
|
||||||
|
|
||||||
|
record = await self.sql_store.fetch_one(
|
||||||
|
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
if record is None:
|
||||||
|
raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
|
||||||
|
|
||||||
|
await self.sql_store.delete(
|
||||||
|
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Deleted item {item_id} from conversation {conversation_id}")
|
||||||
|
return ConversationItemDeletedResource(id=item_id)
|
||||||
|
|
@ -22,7 +22,7 @@ from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.scoring import Scoring
|
from llama_stack.apis.scoring import Scoring
|
||||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||||
from llama_stack.apis.shields import Shield, ShieldInput
|
from llama_stack.apis.shields import Shield, ShieldInput
|
||||||
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
|
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.core.access_control.datatypes import AccessRule
|
from llama_stack.core.access_control.datatypes import AccessRule
|
||||||
|
|
@ -84,15 +84,11 @@ class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ToolWithOwner(Tool, ResourceWithOwner):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup
|
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||||
|
|
||||||
RoutableObjectWithProvider = Annotated[
|
RoutableObjectWithProvider = Annotated[
|
||||||
ModelWithOwner
|
ModelWithOwner
|
||||||
|
|
@ -101,7 +97,6 @@ RoutableObjectWithProvider = Annotated[
|
||||||
| DatasetWithOwner
|
| DatasetWithOwner
|
||||||
| ScoringFnWithOwner
|
| ScoringFnWithOwner
|
||||||
| BenchmarkWithOwner
|
| BenchmarkWithOwner
|
||||||
| ToolWithOwner
|
|
||||||
| ToolGroupWithOwner,
|
| ToolGroupWithOwner,
|
||||||
Field(discriminator="type"),
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
|
|
@ -480,6 +475,13 @@ InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (depreca
|
||||||
If not specified, a default SQLite store will be used.""",
|
If not specified, a default SQLite store will be used.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
conversations_store: SqlStoreConfig | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
Configuration for the persistence store used by the conversations API.
|
||||||
|
If not specified, a default SQLite store will be used.""",
|
||||||
|
)
|
||||||
|
|
||||||
# registry of "resources" in the distribution
|
# registry of "resources" in the distribution
|
||||||
models: list[ModelInput] = Field(default_factory=list)
|
models: list[ModelInput] = Field(default_factory=list)
|
||||||
shields: list[ShieldInput] = Field(default_factory=list)
|
shields: list[ShieldInput] = Field(default_factory=list)
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
|
||||||
logger = get_logger(name=__name__, category="core")
|
logger = get_logger(name=__name__, category="core")
|
||||||
|
|
||||||
|
|
||||||
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts}
|
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations}
|
||||||
|
|
||||||
|
|
||||||
def stack_apis() -> list[Api]:
|
def stack_apis() -> list[Api]:
|
||||||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
||||||
spec = module.get_provider_spec()
|
spec = module.get_provider_spec()
|
||||||
else:
|
else:
|
||||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||||
|
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||||
spec = ProviderSpec(
|
spec = ProviderSpec(
|
||||||
api=Api(provider_api),
|
api=Api(provider_api),
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
|
|
@ -251,8 +252,19 @@ def get_external_providers_from_module(
|
||||||
config_class="",
|
config_class="",
|
||||||
)
|
)
|
||||||
provider_type = provider.provider_type
|
provider_type = provider.provider_type
|
||||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
if isinstance(spec, list):
|
||||||
# return a partially filled out spec that the build script will populate.
|
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||||
|
# with the old method, users could pass in directories of specs using overlapping code
|
||||||
|
# we want to ensure we preserve that flexibility in this method.
|
||||||
|
logger.info(
|
||||||
|
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||||
|
)
|
||||||
|
for provider_spec in spec:
|
||||||
|
if provider_spec.provider_type != provider.provider_type:
|
||||||
|
continue
|
||||||
|
logger.info(f"Adding {provider.provider_type} to registry")
|
||||||
|
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||||
|
else:
|
||||||
registry[Api(provider_api)][provider_type] = spec
|
registry[Api(provider_api)][provider_type] = spec
|
||||||
except ModuleNotFoundError as exc:
|
except ModuleNotFoundError as exc:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
|
|
||||||
|
|
@ -374,6 +374,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
body = options.params or {}
|
body = options.params or {}
|
||||||
body |= options.json_data or {}
|
body |= options.json_data or {}
|
||||||
|
|
||||||
|
# Merge extra_json parameters (extra_body from SDK is converted to extra_json)
|
||||||
|
if hasattr(options, "extra_json") and options.extra_json:
|
||||||
|
body |= options.extra_json
|
||||||
|
|
||||||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||||
body |= path_params
|
body |= path_params
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from typing import Any
|
||||||
from llama_stack.apis.agents import Agents
|
from llama_stack.apis.agents import Agents
|
||||||
from llama_stack.apis.batches import Batches
|
from llama_stack.apis.batches import Batches
|
||||||
from llama_stack.apis.benchmarks import Benchmarks
|
from llama_stack.apis.benchmarks import Benchmarks
|
||||||
|
from llama_stack.apis.conversations import Conversations
|
||||||
from llama_stack.apis.datasetio import DatasetIO
|
from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.datatypes import ExternalApiSpec
|
from llama_stack.apis.datatypes import ExternalApiSpec
|
||||||
|
|
@ -96,6 +97,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
||||||
Api.tool_runtime: ToolRuntime,
|
Api.tool_runtime: ToolRuntime,
|
||||||
Api.files: Files,
|
Api.files: Files,
|
||||||
Api.prompts: Prompts,
|
Api.prompts: Prompts,
|
||||||
|
Api.conversations: Conversations,
|
||||||
}
|
}
|
||||||
|
|
||||||
if external_apis:
|
if external_apis:
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,6 @@ from llama_stack.apis.inference import (
|
||||||
CompletionResponseStreamChunk,
|
CompletionResponseStreamChunk,
|
||||||
Inference,
|
Inference,
|
||||||
ListOpenAIChatCompletionResponse,
|
ListOpenAIChatCompletionResponse,
|
||||||
LogProbConfig,
|
|
||||||
Message,
|
Message,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
|
@ -42,12 +41,7 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
Order,
|
Order,
|
||||||
ResponseFormat,
|
|
||||||
SamplingParams,
|
|
||||||
StopReason,
|
StopReason,
|
||||||
ToolChoice,
|
|
||||||
ToolConfig,
|
|
||||||
ToolDefinition,
|
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
|
|
@ -185,129 +179,6 @@ class InferenceRouter(Inference):
|
||||||
raise ModelTypeError(model_id, model.model_type, expected_model_type)
|
raise ModelTypeError(model_id, model.model_type, expected_model_type)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
async def chat_completion(
|
|
||||||
self,
|
|
||||||
model_id: str,
|
|
||||||
messages: list[Message],
|
|
||||||
sampling_params: SamplingParams | None = None,
|
|
||||||
response_format: ResponseFormat | None = None,
|
|
||||||
tools: list[ToolDefinition] | None = None,
|
|
||||||
tool_choice: ToolChoice | None = None,
|
|
||||||
tool_prompt_format: ToolPromptFormat | None = None,
|
|
||||||
stream: bool | None = False,
|
|
||||||
logprobs: LogProbConfig | None = None,
|
|
||||||
tool_config: ToolConfig | None = None,
|
|
||||||
) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
|
|
||||||
logger.debug(
|
|
||||||
f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
|
|
||||||
)
|
|
||||||
if sampling_params is None:
|
|
||||||
sampling_params = SamplingParams()
|
|
||||||
model = await self._get_model(model_id, ModelType.llm)
|
|
||||||
if tool_config:
|
|
||||||
if tool_choice and tool_choice != tool_config.tool_choice:
|
|
||||||
raise ValueError("tool_choice and tool_config.tool_choice must match")
|
|
||||||
if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
|
|
||||||
raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
|
|
||||||
else:
|
|
||||||
params = {}
|
|
||||||
if tool_choice:
|
|
||||||
params["tool_choice"] = tool_choice
|
|
||||||
if tool_prompt_format:
|
|
||||||
params["tool_prompt_format"] = tool_prompt_format
|
|
||||||
tool_config = ToolConfig(**params)
|
|
||||||
|
|
||||||
tools = tools or []
|
|
||||||
if tool_config.tool_choice == ToolChoice.none:
|
|
||||||
tools = []
|
|
||||||
elif tool_config.tool_choice == ToolChoice.auto:
|
|
||||||
pass
|
|
||||||
elif tool_config.tool_choice == ToolChoice.required:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# verify tool_choice is one of the tools
|
|
||||||
tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
|
|
||||||
if tool_config.tool_choice not in tool_names:
|
|
||||||
raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
|
|
||||||
|
|
||||||
params = dict(
|
|
||||||
model_id=model_id,
|
|
||||||
messages=messages,
|
|
||||||
sampling_params=sampling_params,
|
|
||||||
tools=tools,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tool_prompt_format=tool_prompt_format,
|
|
||||||
response_format=response_format,
|
|
||||||
stream=stream,
|
|
||||||
logprobs=logprobs,
|
|
||||||
tool_config=tool_config,
|
|
||||||
)
|
|
||||||
provider = await self.routing_table.get_provider_impl(model_id)
|
|
||||||
prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
|
|
||||||
|
|
||||||
if stream:
|
|
||||||
response_stream = await provider.chat_completion(**params)
|
|
||||||
return self.stream_tokens_and_compute_metrics(
|
|
||||||
response=response_stream,
|
|
||||||
prompt_tokens=prompt_tokens,
|
|
||||||
model=model,
|
|
||||||
tool_prompt_format=tool_config.tool_prompt_format,
|
|
||||||
)
|
|
||||||
|
|
||||||
response = await provider.chat_completion(**params)
|
|
||||||
metrics = await self.count_tokens_and_compute_metrics(
|
|
||||||
response=response,
|
|
||||||
prompt_tokens=prompt_tokens,
|
|
||||||
model=model,
|
|
||||||
tool_prompt_format=tool_config.tool_prompt_format,
|
|
||||||
)
|
|
||||||
# these metrics will show up in the client response.
|
|
||||||
response.metrics = (
|
|
||||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
|
||||||
)
|
|
||||||
return response
|
|
||||||
|
|
||||||
async def completion(
|
|
||||||
self,
|
|
||||||
model_id: str,
|
|
||||||
content: InterleavedContent,
|
|
||||||
sampling_params: SamplingParams | None = None,
|
|
||||||
response_format: ResponseFormat | None = None,
|
|
||||||
stream: bool | None = False,
|
|
||||||
logprobs: LogProbConfig | None = None,
|
|
||||||
) -> AsyncGenerator:
|
|
||||||
if sampling_params is None:
|
|
||||||
sampling_params = SamplingParams()
|
|
||||||
logger.debug(
|
|
||||||
f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
|
|
||||||
)
|
|
||||||
model = await self._get_model(model_id, ModelType.llm)
|
|
||||||
provider = await self.routing_table.get_provider_impl(model_id)
|
|
||||||
params = dict(
|
|
||||||
model_id=model_id,
|
|
||||||
content=content,
|
|
||||||
sampling_params=sampling_params,
|
|
||||||
response_format=response_format,
|
|
||||||
stream=stream,
|
|
||||||
logprobs=logprobs,
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt_tokens = await self._count_tokens(content)
|
|
||||||
response = await provider.completion(**params)
|
|
||||||
if stream:
|
|
||||||
return self.stream_tokens_and_compute_metrics(
|
|
||||||
response=response,
|
|
||||||
prompt_tokens=prompt_tokens,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics = await self.count_tokens_and_compute_metrics(
|
|
||||||
response=response, prompt_tokens=prompt_tokens, model=model
|
|
||||||
)
|
|
||||||
response.metrics = metrics if response.metrics is None else response.metrics + metrics
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.tools import (
|
from llama_stack.apis.tools import (
|
||||||
ListToolsResponse,
|
ListToolDefsResponse,
|
||||||
RAGDocument,
|
RAGDocument,
|
||||||
RAGQueryConfig,
|
RAGQueryConfig,
|
||||||
RAGQueryResult,
|
RAGQueryResult,
|
||||||
|
|
@ -86,6 +86,6 @@ class ToolRuntimeRouter(ToolRuntime):
|
||||||
|
|
||||||
async def list_runtime_tools(
|
async def list_runtime_tools(
|
||||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||||
) -> ListToolsResponse:
|
) -> ListToolDefsResponse:
|
||||||
logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
|
logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
|
||||||
return await self.routing_table.list_tools(tool_group_id)
|
return await self.routing_table.list_tools(tool_group_id)
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import URL
|
from llama_stack.apis.common.content_types import URL
|
||||||
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
||||||
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
|
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
||||||
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
|
|
@ -27,7 +27,7 @@ def parse_toolgroup_from_toolgroup_name_pair(toolgroup_name_with_maybe_tool_name
|
||||||
|
|
||||||
|
|
||||||
class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
toolgroups_to_tools: dict[str, list[Tool]] = {}
|
toolgroups_to_tools: dict[str, list[ToolDef]] = {}
|
||||||
tool_to_toolgroup: dict[str, str] = {}
|
tool_to_toolgroup: dict[str, str] = {}
|
||||||
|
|
||||||
# overridden
|
# overridden
|
||||||
|
|
@ -43,7 +43,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
routing_key = self.tool_to_toolgroup[routing_key]
|
routing_key = self.tool_to_toolgroup[routing_key]
|
||||||
return await super().get_provider_impl(routing_key, provider_id)
|
return await super().get_provider_impl(routing_key, provider_id)
|
||||||
|
|
||||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
|
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||||
if toolgroup_id:
|
if toolgroup_id:
|
||||||
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
||||||
toolgroup_id = group_id
|
toolgroup_id = group_id
|
||||||
|
|
@ -68,30 +68,19 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
continue
|
continue
|
||||||
all_tools.extend(self.toolgroups_to_tools[toolgroup.identifier])
|
all_tools.extend(self.toolgroups_to_tools[toolgroup.identifier])
|
||||||
|
|
||||||
return ListToolsResponse(data=all_tools)
|
return ListToolDefsResponse(data=all_tools)
|
||||||
|
|
||||||
async def _index_tools(self, toolgroup: ToolGroup):
|
async def _index_tools(self, toolgroup: ToolGroup):
|
||||||
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
||||||
tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
|
tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
|
||||||
|
|
||||||
# TODO: kill this Tool vs ToolDef distinction
|
|
||||||
tooldefs = tooldefs_response.data
|
tooldefs = tooldefs_response.data
|
||||||
tools = []
|
|
||||||
for t in tooldefs:
|
for t in tooldefs:
|
||||||
tools.append(
|
t.toolgroup_id = toolgroup.identifier
|
||||||
Tool(
|
|
||||||
identifier=t.name,
|
|
||||||
toolgroup_id=toolgroup.identifier,
|
|
||||||
description=t.description or "",
|
|
||||||
parameters=t.parameters or [],
|
|
||||||
metadata=t.metadata,
|
|
||||||
provider_id=toolgroup.provider_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.toolgroups_to_tools[toolgroup.identifier] = tools
|
self.toolgroups_to_tools[toolgroup.identifier] = tooldefs
|
||||||
for tool in tools:
|
for tool in tooldefs:
|
||||||
self.tool_to_toolgroup[tool.identifier] = toolgroup.identifier
|
self.tool_to_toolgroup[tool.name] = toolgroup.identifier
|
||||||
|
|
||||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||||
return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
|
return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
|
||||||
|
|
@ -102,12 +91,12 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
raise ToolGroupNotFoundError(toolgroup_id)
|
raise ToolGroupNotFoundError(toolgroup_id)
|
||||||
return tool_group
|
return tool_group
|
||||||
|
|
||||||
async def get_tool(self, tool_name: str) -> Tool:
|
async def get_tool(self, tool_name: str) -> ToolDef:
|
||||||
if tool_name in self.tool_to_toolgroup:
|
if tool_name in self.tool_to_toolgroup:
|
||||||
toolgroup_id = self.tool_to_toolgroup[tool_name]
|
toolgroup_id = self.tool_to_toolgroup[tool_name]
|
||||||
tools = self.toolgroups_to_tools[toolgroup_id]
|
tools = self.toolgroups_to_tools[toolgroup_id]
|
||||||
for tool in tools:
|
for tool in tools:
|
||||||
if tool.identifier == tool_name:
|
if tool.name == tool_name:
|
||||||
return tool
|
return tool
|
||||||
raise ValueError(f"Tool '{tool_name}' not found")
|
raise ValueError(f"Tool '{tool_name}' not found")
|
||||||
|
|
||||||
|
|
@ -132,7 +121,6 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
# baked in some of the code and tests right now.
|
# baked in some of the code and tests right now.
|
||||||
if not toolgroup.mcp_endpoint:
|
if not toolgroup.mcp_endpoint:
|
||||||
await self._index_tools(toolgroup)
|
await self._index_tools(toolgroup)
|
||||||
return toolgroup
|
|
||||||
|
|
||||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
||||||
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import functools
|
import functools
|
||||||
|
|
@ -12,7 +11,6 @@ import inspect
|
||||||
import json
|
import json
|
||||||
import logging # allow-direct-logging
|
import logging # allow-direct-logging
|
||||||
import os
|
import os
|
||||||
import ssl
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
|
||||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationRequiredError,
|
AuthenticationRequiredError,
|
||||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
||||||
Stack,
|
Stack,
|
||||||
cast_image_name_to_string,
|
cast_image_name_to_string,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
validate_env_pair,
|
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
|
|
@ -257,7 +253,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if logger.isEnabledFor(logging.DEBUG):
|
if logger.isEnabledFor(logging.INFO):
|
||||||
logger.exception(f"Error executing endpoint {route=} {method=}")
|
logger.exception(f"Error executing endpoint {route=} {method=}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
||||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
||||||
return await self.app(scope, receive, send)
|
return await self.app(scope, receive, send)
|
||||||
|
|
||||||
|
|
||||||
def create_app(
|
def create_app() -> StackApp:
|
||||||
config_file: str | None = None,
|
|
||||||
env_vars: list[str] | None = None,
|
|
||||||
) -> StackApp:
|
|
||||||
"""Create and configure the FastAPI application.
|
"""Create and configure the FastAPI application.
|
||||||
|
|
||||||
Args:
|
This factory function reads configuration from environment variables:
|
||||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||||
env_vars: List of environment variables in KEY=value format.
|
|
||||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Configured StackApp instance.
|
Configured StackApp instance.
|
||||||
"""
|
"""
|
||||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||||
if config_file is None:
|
if config_file is None:
|
||||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||||
|
|
||||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
||||||
logger_config = LoggingConfig(**cfg)
|
logger_config = LoggingConfig(**cfg)
|
||||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||||
|
|
||||||
if env_vars:
|
|
||||||
for env_pair in env_vars:
|
|
||||||
try:
|
|
||||||
key, value = validate_env_pair(env_pair)
|
|
||||||
logger.info(f"Setting environment variable {key} => {value}")
|
|
||||||
os.environ[key] = value
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error(f"Error: {str(e)}")
|
|
||||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
|
||||||
|
|
||||||
config = replace_env_vars(config_contents)
|
config = replace_env_vars(config_contents)
|
||||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||||
|
|
||||||
|
|
@ -451,6 +432,7 @@ def create_app(
|
||||||
apis_to_serve.add("inspect")
|
apis_to_serve.add("inspect")
|
||||||
apis_to_serve.add("providers")
|
apis_to_serve.add("providers")
|
||||||
apis_to_serve.add("prompts")
|
apis_to_serve.add("prompts")
|
||||||
|
apis_to_serve.add("conversations")
|
||||||
for api_str in apis_to_serve:
|
for api_str in apis_to_serve:
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
|
|
||||||
|
|
@ -493,101 +475,6 @@ def create_app(
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace | None = None):
|
|
||||||
"""Start the LlamaStack server."""
|
|
||||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
|
||||||
|
|
||||||
add_config_distro_args(parser)
|
|
||||||
parser.add_argument(
|
|
||||||
"--port",
|
|
||||||
type=int,
|
|
||||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
|
||||||
help="Port to listen on",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--env",
|
|
||||||
action="append",
|
|
||||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
|
||||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
|
||||||
# parsed from the command line
|
|
||||||
if args is None:
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
config_or_distro = get_config_from_args(args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
app = create_app(
|
|
||||||
config_file=config_or_distro,
|
|
||||||
env_vars=args.env,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating app: {str(e)}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
|
||||||
with open(config_file) as fp:
|
|
||||||
config_contents = yaml.safe_load(fp)
|
|
||||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
|
||||||
logger_config = LoggingConfig(**cfg)
|
|
||||||
else:
|
|
||||||
logger_config = None
|
|
||||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
|
||||||
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
# Configure SSL if certificates are provided
|
|
||||||
port = args.port or config.server.port
|
|
||||||
|
|
||||||
ssl_config = None
|
|
||||||
keyfile = config.server.tls_keyfile
|
|
||||||
certfile = config.server.tls_certfile
|
|
||||||
|
|
||||||
if keyfile and certfile:
|
|
||||||
ssl_config = {
|
|
||||||
"ssl_keyfile": keyfile,
|
|
||||||
"ssl_certfile": certfile,
|
|
||||||
}
|
|
||||||
if config.server.tls_cafile:
|
|
||||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
|
||||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
|
||||||
logger.info(
|
|
||||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
|
||||||
|
|
||||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
|
||||||
logger.info(f"Listening on {listen_host}:{port}")
|
|
||||||
|
|
||||||
uvicorn_config = {
|
|
||||||
"app": app,
|
|
||||||
"host": listen_host,
|
|
||||||
"port": port,
|
|
||||||
"lifespan": "on",
|
|
||||||
"log_level": logger.getEffectiveLevel(),
|
|
||||||
"log_config": logger_config,
|
|
||||||
}
|
|
||||||
if ssl_config:
|
|
||||||
uvicorn_config.update(ssl_config)
|
|
||||||
|
|
||||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
|
||||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
|
||||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
|
||||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
|
||||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
|
||||||
# have a default handler for it.
|
|
||||||
#
|
|
||||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
|
||||||
# signal handling but this is quite intrusive and not worth the effort.
|
|
||||||
try:
|
|
||||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
|
||||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
|
||||||
|
|
||||||
|
|
||||||
def _log_run_config(run_config: StackRunConfig):
|
def _log_run_config(run_config: StackRunConfig):
|
||||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||||
logger.info("Run configuration:")
|
logger.info("Run configuration:")
|
||||||
|
|
@ -614,7 +501,3 @@ def remove_disabled_providers(obj):
|
||||||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||||
else:
|
else:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import yaml
|
||||||
|
|
||||||
from llama_stack.apis.agents import Agents
|
from llama_stack.apis.agents import Agents
|
||||||
from llama_stack.apis.benchmarks import Benchmarks
|
from llama_stack.apis.benchmarks import Benchmarks
|
||||||
|
from llama_stack.apis.conversations import Conversations
|
||||||
from llama_stack.apis.datasetio import DatasetIO
|
from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.eval import Eval
|
from llama_stack.apis.eval import Eval
|
||||||
|
|
@ -34,6 +35,7 @@ from llama_stack.apis.telemetry import Telemetry
|
||||||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDBs
|
from llama_stack.apis.vector_dbs import VectorDBs
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||||
from llama_stack.core.datatypes import Provider, StackRunConfig
|
from llama_stack.core.datatypes import Provider, StackRunConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||||
|
|
@ -73,6 +75,7 @@ class LlamaStack(
|
||||||
RAGToolRuntime,
|
RAGToolRuntime,
|
||||||
Files,
|
Files,
|
||||||
Prompts,
|
Prompts,
|
||||||
|
Conversations,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -312,6 +315,12 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
||||||
)
|
)
|
||||||
impls[Api.prompts] = prompts_impl
|
impls[Api.prompts] = prompts_impl
|
||||||
|
|
||||||
|
conversations_impl = ConversationServiceImpl(
|
||||||
|
ConversationServiceConfig(run_config=run_config),
|
||||||
|
deps=impls,
|
||||||
|
)
|
||||||
|
impls[Api.conversations] = conversations_impl
|
||||||
|
|
||||||
|
|
||||||
class Stack:
|
class Stack:
|
||||||
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
||||||
|
|
@ -342,6 +351,8 @@ class Stack:
|
||||||
|
|
||||||
if Api.prompts in impls:
|
if Api.prompts in impls:
|
||||||
await impls[Api.prompts].initialize()
|
await impls[Api.prompts].initialize()
|
||||||
|
if Api.conversations in impls:
|
||||||
|
await impls[Api.conversations].initialize()
|
||||||
|
|
||||||
await register_resources(self.run_config, impls)
|
await register_resources(self.run_config, impls)
|
||||||
|
|
||||||
|
|
|
||||||