Merge branch 'llamastack:main' into model_unregisteration_error_message
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
|
|
|
|||
2
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -2,7 +2,7 @@ blank_issues_enabled: false
|
|||
|
||||
contact_links:
|
||||
- name: Have you read the docs?
|
||||
url: https://llamastack.github.io/latest/providers/external/index.html
|
||||
url: https://llamastack.github.io/providers/external/index.html
|
||||
about: Much help can be found in the docs
|
||||
- name: Start a discussion
|
||||
url: https://github.com/llamastack/llama-stack/discussions/new/
|
||||
|
|
|
|||
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
|||
# This file documents Triage members in the Llama Stack community
|
||||
@franciscojavierarceo
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
|
|
@ -12,6 +12,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||
|
|
|
|||
93
.github/workflows/conformance.yml
vendored
|
|
@ -1,6 +1,11 @@
|
|||
# API Conformance Tests
|
||||
# This workflow ensures that API changes maintain backward compatibility and don't break existing integrations
|
||||
# It runs schema validation and OpenAPI diff checks to catch breaking changes early
|
||||
#
|
||||
# The workflow handles both monolithic and split API specifications:
|
||||
# - If split specs exist (stable/experimental/deprecated), they are stitched together for comparison
|
||||
# - If only monolithic spec exists, it is used directly
|
||||
# This allows for clean API organization while maintaining robust conformance testing
|
||||
|
||||
name: API Conformance Tests
|
||||
|
||||
|
|
@ -11,11 +16,14 @@ on:
|
|||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize, reopened]
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
paths:
|
||||
- 'docs/static/llama-stack-spec.yaml'
|
||||
- 'docs/static/llama-stack-spec.html'
|
||||
- '.github/workflows/conformance.yml' # This workflow itself
|
||||
- 'docs/static/llama-stack-spec.yaml' # Legacy monolithic spec
|
||||
- 'docs/static/stable-llama-stack-spec.yaml' # Stable APIs spec
|
||||
- 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec
|
||||
- 'docs/static/deprecated-llama-stack-spec.yaml' # Deprecated APIs spec
|
||||
- 'docs/static/llama-stack-spec.html' # Legacy HTML spec
|
||||
- '.github/workflows/conformance.yml' # This workflow itself
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||
|
|
@ -27,14 +35,31 @@ jobs:
|
|||
check-schema-compatibility:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# Using specific version 4.1.7 because 5.0.0 fails when trying to run this locally using `act`
|
||||
# This ensures consistent behavior between local testing and CI
|
||||
- name: Checkout PR Code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# Check if we should skip conformance testing due to breaking changes
|
||||
- name: Check if conformance test should be skipped
|
||||
id: skip-check
|
||||
run: |
|
||||
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||
|
||||
# Skip if title contains "!:" indicating breaking change (like "feat!:")
|
||||
if [[ "$PR_TITLE" == *"!:"* ]]; then
|
||||
echo "skip=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get all commits in this PR and check for BREAKING CHANGE footer
|
||||
git log --format="%B" ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} | \
|
||||
grep -q "BREAKING CHANGE:" && echo "skip=true" >> $GITHUB_OUTPUT || echo "skip=false" >> $GITHUB_OUTPUT
|
||||
shell: bash
|
||||
# Checkout the base branch to compare against (usually main)
|
||||
# This allows us to diff the current changes against the previous state
|
||||
- name: Checkout Base Branch
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.base.ref }}
|
||||
|
|
@ -42,6 +67,7 @@ jobs:
|
|||
|
||||
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||
- name: Cache oasdiff
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
id: cache-oasdiff
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830
|
||||
with:
|
||||
|
|
@ -50,20 +76,69 @@ jobs:
|
|||
|
||||
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||
- name: Install oasdiff
|
||||
if: steps.cache-oasdiff.outputs.cache-hit != 'true'
|
||||
if: steps.skip-check.outputs.skip != 'true' && steps.cache-oasdiff.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||
cp /usr/local/bin/oasdiff ~/oasdiff
|
||||
|
||||
# Setup cached oasdiff
|
||||
- name: Setup cached oasdiff
|
||||
if: steps.cache-oasdiff.outputs.cache-hit == 'true'
|
||||
if: steps.skip-check.outputs.skip != 'true' && steps.cache-oasdiff.outputs.cache-hit == 'true'
|
||||
run: |
|
||||
sudo cp ~/oasdiff /usr/local/bin/oasdiff
|
||||
sudo chmod +x /usr/local/bin/oasdiff
|
||||
|
||||
# Install yq for YAML processing
|
||||
- name: Install yq
|
||||
run: |
|
||||
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
|
||||
sudo chmod +x /usr/local/bin/yq
|
||||
|
||||
# Verify API specs exist for conformance testing
|
||||
- name: Check API Specs
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
run: |
|
||||
echo "Checking for API specification files..."
|
||||
|
||||
# Check current branch
|
||||
if [ -f "docs/static/stable-llama-stack-spec.yaml" ]; then
|
||||
echo "✓ Found stable API spec in current branch"
|
||||
CURRENT_SPEC="docs/static/stable-llama-stack-spec.yaml"
|
||||
elif [ -f "docs/static/llama-stack-spec.yaml" ]; then
|
||||
echo "✓ Found monolithic API spec in current branch"
|
||||
CURRENT_SPEC="docs/static/llama-stack-spec.yaml"
|
||||
else
|
||||
echo "❌ No API specs found in current branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check base branch
|
||||
if [ -f "base/docs/static/stable-llama-stack-spec.yaml" ]; then
|
||||
echo "✓ Found stable API spec in base branch"
|
||||
BASE_SPEC="base/docs/static/stable-llama-stack-spec.yaml"
|
||||
elif [ -f "base/docs/static/llama-stack-spec.yaml" ]; then
|
||||
echo "✓ Found monolithic API spec in base branch"
|
||||
BASE_SPEC="base/docs/static/llama-stack-spec.yaml"
|
||||
else
|
||||
echo "❌ No API specs found in base branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Export for next step
|
||||
echo "BASE_SPEC=${BASE_SPEC}" >> $GITHUB_ENV
|
||||
echo "CURRENT_SPEC=${CURRENT_SPEC}" >> $GITHUB_ENV
|
||||
|
||||
echo "Will compare: ${BASE_SPEC} -> ${CURRENT_SPEC}"
|
||||
|
||||
# Run oasdiff to detect breaking changes in the API specification
|
||||
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||
- name: Run OpenAPI Breaking Change Diff
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
run: |
|
||||
oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/'
|
||||
oasdiff breaking --fail-on ERR $BASE_SPEC $CURRENT_SPEC --match-path '^/v1/'
|
||||
|
||||
# Report when test is skipped
|
||||
- name: Report skip reason
|
||||
if: steps.skip-check.outputs.skip == 'true'
|
||||
run: |
|
||||
echo "Conformance test skipped due to breaking change indicator"
|
||||
|
|
|
|||
2
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -84,6 +84,8 @@ jobs:
|
|||
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml
|
||||
cat $run_dir/run.yaml
|
||||
|
||||
# avoid line breaks in the server log, especially because we grep it below.
|
||||
export COLUMNS=1984
|
||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
|
|
|
|||
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
|||
|
||||
run-replay-mode-tests:
|
||||
runs-on: ubuntu-latest
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, server]
|
||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
suite: [base, vision]
|
||||
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||
# Weekly schedule (Sun 1 AM): vllm+base
|
||||
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||
config: >-
|
||||
${{
|
||||
github.event.schedule == '1 0 * * 0'
|
||||
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||
}}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
|
@ -64,14 +73,14 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
client-version: ${{ matrix.client-version }}
|
||||
setup: ${{ matrix.setup }}
|
||||
suite: ${{ matrix.suite }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
setup: ${{ matrix.setup }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
inference-mode: 'replay'
|
||||
suite: ${{ matrix.suite }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
|
|
|
|||
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
name: Pre-commit Bot
|
||||
|
||||
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
# Only run on pull request comments
|
||||
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Check comment author and get PR details
|
||||
id: check_author
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR details
|
||||
const pr = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: context.issue.number
|
||||
});
|
||||
|
||||
// Check if commenter has write access or is the PR author
|
||||
const commenter = context.payload.comment.user.login;
|
||||
const prAuthor = pr.data.user.login;
|
||||
|
||||
let hasPermission = false;
|
||||
|
||||
// Check if commenter is PR author
|
||||
if (commenter === prAuthor) {
|
||||
hasPermission = true;
|
||||
console.log(`Comment author ${commenter} is the PR author`);
|
||||
} else {
|
||||
// Check if commenter has write/admin access
|
||||
try {
|
||||
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: commenter
|
||||
});
|
||||
|
||||
const level = permission.data.permission;
|
||||
hasPermission = ['write', 'admin', 'maintain'].includes(level);
|
||||
console.log(`Comment author ${commenter} has permission: ${level}`);
|
||||
} catch (error) {
|
||||
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasPermission) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
|
||||
});
|
||||
core.setFailed(`User ${commenter} does not have permission`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Save PR info for later steps
|
||||
core.setOutput('pr_number', context.issue.number);
|
||||
core.setOutput('pr_head_ref', pr.data.head.ref);
|
||||
core.setOutput('pr_head_sha', pr.data.head.sha);
|
||||
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
|
||||
core.setOutput('pr_base_ref', pr.data.base.ref);
|
||||
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
|
||||
core.setOutput('authorized', 'true');
|
||||
|
||||
- name: React to comment
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.reactions.createForIssueComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: context.payload.comment.id,
|
||||
content: 'rocket'
|
||||
});
|
||||
|
||||
- name: Comment starting
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||
});
|
||||
|
||||
- name: Checkout PR branch (same-repo)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Checkout PR branch (fork)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
repository: ${{ steps.check_author.outputs.pr_head_repo }}
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Verify checkout
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: |
|
||||
echo "Current SHA: $(git rev-parse HEAD)"
|
||||
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
|
||||
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
|
||||
echo "::error::Checked out SHA does not match expected SHA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
cache-dependency-path: |
|
||||
**/requirements*.txt
|
||||
.pre-commit-config.yaml
|
||||
|
||||
- name: Set up Node.js
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'llama_stack/ui/'
|
||||
|
||||
- name: Install npm dependencies
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: npm ci
|
||||
working-directory: llama_stack/ui
|
||||
|
||||
- name: Run pre-commit
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: precommit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check for changes
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: changes
|
||||
run: |
|
||||
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "Changes detected after pre-commit"
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No changes after pre-commit"
|
||||
fi
|
||||
|
||||
- name: Commit and push changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
run: |
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
||||
|
||||
git add -A
|
||||
git commit -m "style: apply pre-commit fixes
|
||||
|
||||
🤖 Applied by @github-actions bot via pre-commit workflow"
|
||||
|
||||
# Push changes
|
||||
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
|
||||
|
||||
- name: Comment success with changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
|
||||
});
|
||||
|
||||
- name: Comment success without changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
|
||||
});
|
||||
|
||||
- name: Comment failure
|
||||
if: failure()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
|
||||
});
|
||||
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -150,7 +150,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ Before pushing your changes, make sure that the pre-commit hooks have passed suc
|
|||
|
||||
We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md).
|
||||
|
||||
If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later.
|
||||
If in doubt, please open a [discussion](https://github.com/llamastack/llama-stack/discussions); we can always convert that to an issue later.
|
||||
|
||||
### Issues
|
||||
We use GitHub issues to track public bugs. Please ensure your description is
|
||||
|
|
@ -165,8 +165,8 @@ Building a stack image will use the production version of the `llama-stack` and
|
|||
Example:
|
||||
```bash
|
||||
cd work/
|
||||
git clone https://github.com/meta-llama/llama-stack.git
|
||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||
git clone https://github.com/llamastack/llama-stack.git
|
||||
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||
cd llama-stack
|
||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||
```
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||
|
||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
|
||||
|
||||
### ✨🎉 Llama 4 Support 🎉✨
|
||||
|
|
@ -120,7 +120,7 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
|
|||
|
||||
### API Providers
|
||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||
Please checkout for [full list](https://llamastack.github.io/latest/providers/index.html)
|
||||
Please checkout for [full list](https://llamastack.github.io/docs/providers)
|
||||
|
||||
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||
|
|
@ -151,7 +151,7 @@ Please checkout for [full list](https://llamastack.github.io/latest/providers/in
|
|||
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||
|
||||
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/latest/providers/external/index.html) documentation.
|
||||
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
|
||||
|
||||
### Distributions
|
||||
|
||||
|
|
|
|||
49
docs/docs/api-overview.md
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# API Reference Overview
|
||||
|
||||
The Llama Stack provides a comprehensive set of APIs organized by stability level to help you choose the right endpoints for your use case.
|
||||
|
||||
## 🟢 Stable APIs
|
||||
|
||||
**Production-ready APIs with backward compatibility guarantees.**
|
||||
|
||||
These APIs are fully tested, documented, and stable. They follow semantic versioning principles and maintain backward compatibility within major versions. Recommended for production applications.
|
||||
|
||||
[**Browse Stable APIs →**](./api/llama-stack-specification)
|
||||
|
||||
**Key Features:**
|
||||
- ✅ Backward compatibility guaranteed
|
||||
- ✅ Comprehensive testing and validation
|
||||
- ✅ Production-ready reliability
|
||||
- ✅ Long-term support
|
||||
|
||||
---
|
||||
|
||||
## 🟡 Experimental APIs
|
||||
|
||||
**Preview APIs that may change before becoming stable.**
|
||||
|
||||
These APIs include v1alpha and v1beta endpoints that are feature-complete but may undergo changes based on feedback. Great for exploring new capabilities and providing feedback.
|
||||
|
||||
[**Browse Experimental APIs →**](./api-experimental/llama-stack-specification-experimental-apis)
|
||||
|
||||
**Key Features:**
|
||||
- 🧪 Latest features and capabilities
|
||||
- 🧪 May change based on user feedback
|
||||
- 🧪 Active development and iteration
|
||||
- 🧪 Opportunity to influence final design
|
||||
|
||||
---
|
||||
|
||||
## 🔴 Deprecated APIs
|
||||
|
||||
**Legacy APIs for migration reference.**
|
||||
|
||||
These APIs are deprecated and will be removed in future versions. They are provided for migration purposes and to help transition to newer, stable alternatives.
|
||||
|
||||
[**Browse Deprecated APIs →**](./api-deprecated/llama-stack-specification-deprecated-apis)
|
||||
|
||||
**Key Features:**
|
||||
- ⚠️ Will be removed in future versions
|
||||
- ⚠️ Migration guidance provided
|
||||
- ⚠️ Use for compatibility during transition
|
||||
- ⚠️ Not recommended for new projects
|
||||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
|||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||
|
||||
## Visualization with Jaeger
|
||||
### Quick Setup: Complete Telemetry Stack
|
||||
|
||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
||||
|
||||
### Starting Jaeger
|
||||
|
||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
||||
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||
|
||||
```bash
|
||||
docker run --pull always --rm --name jaeger \
|
||||
-p 16686:16686 -p 4318:4318 \
|
||||
jaegertracing/jaeger:2.1.0
|
||||
./scripts/telemetry/setup_telemetry.sh
|
||||
```
|
||||
|
||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
||||
This sets up:
|
||||
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||
- **Prometheus**: http://localhost:9090 (metrics)
|
||||
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||
|
||||
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||
|
||||
## Querying Metrics
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ Once defined, simply pass the tool to the agent config. `Agent` will take care o
|
|||
agent = Agent(client, ..., tools=[my_tool])
|
||||
```
|
||||
|
||||
Refer to [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/e2e_loop_with_client_tools.py) for an example of how to use client provided tools.
|
||||
Refer to [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/) for an example of how to use client provided tools.
|
||||
|
||||
## Tool Invocation
|
||||
|
||||
|
|
|
|||
|
|
@ -152,7 +152,6 @@ __all__ = ["WeatherAPI", "available_providers"]
|
|||
from typing import Protocol
|
||||
|
||||
from llama_stack.providers.datatypes import (
|
||||
AdapterSpec,
|
||||
Api,
|
||||
ProviderSpec,
|
||||
RemoteProviderSpec,
|
||||
|
|
@ -166,12 +165,10 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.weather,
|
||||
provider_type="remote::kaze",
|
||||
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="kaze",
|
||||
module="llama_stack_provider_kaze",
|
||||
pip_packages=["llama_stack_provider_kaze"],
|
||||
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
||||
),
|
||||
adapter_type="kaze",
|
||||
module="llama_stack_provider_kaze",
|
||||
pip_packages=["llama_stack_provider_kaze"],
|
||||
config_class="llama_stack_provider_kaze.KazeProviderConfig",
|
||||
),
|
||||
]
|
||||
|
||||
|
|
@ -325,11 +322,10 @@ class WeatherKazeAdapter(WeatherProvider):
|
|||
|
||||
```yaml
|
||||
# ~/.llama/providers.d/remote/weather/kaze.yaml
|
||||
adapter:
|
||||
adapter_type: kaze
|
||||
pip_packages: ["llama_stack_provider_kaze"]
|
||||
config_class: llama_stack_provider_kaze.config.KazeProviderConfig
|
||||
module: llama_stack_provider_kaze
|
||||
adapter_type: kaze
|
||||
pip_packages: ["llama_stack_provider_kaze"]
|
||||
config_class: llama_stack_provider_kaze.config.KazeProviderConfig
|
||||
module: llama_stack_provider_kaze
|
||||
optional_api_dependencies: []
|
||||
```
|
||||
|
||||
|
|
@ -361,7 +357,7 @@ server:
|
|||
8. Run the server:
|
||||
|
||||
```bash
|
||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||
llama stack run ~/.llama/run-byoa.yaml
|
||||
```
|
||||
|
||||
9. Test the API:
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ spec:
|
|||
- name: llama-stack
|
||||
image: localhost/llama-stack-run-k8s:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -509,16 +509,16 @@ server:
|
|||
provider_config:
|
||||
type: "github_token"
|
||||
github_api_base_url: "https://api.github.com"
|
||||
access_policy:
|
||||
- permit:
|
||||
principal: user-1
|
||||
actions: [create, read, delete]
|
||||
description: user-1 has full access to all resources
|
||||
- permit:
|
||||
principal: user-2
|
||||
actions: [read]
|
||||
resource: model::model-1
|
||||
description: user-2 has read access to model-1 only
|
||||
access_policy:
|
||||
- permit:
|
||||
principal: user-1
|
||||
actions: [create, read, delete]
|
||||
description: user-1 has full access to all resources
|
||||
- permit:
|
||||
principal: user-2
|
||||
actions: [read]
|
||||
resource: model::model-1
|
||||
description: user-2 has read access to model-1 only
|
||||
```
|
||||
|
||||
Similarly, the following restricts access to particular kubernetes
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ spec:
|
|||
value: "${SAFETY_MODEL}"
|
||||
- name: TAVILY_SEARCH_API_KEY
|
||||
value: "${TAVILY_SEARCH_API_KEY}"
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
ports:
|
||||
- containerPort: 8321
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -131,4 +131,4 @@ graph TD
|
|||
3. **Configure your providers** with API keys or local models
|
||||
4. **Start building** with Llama Stack!
|
||||
|
||||
For help choosing or troubleshooting, check our [Getting Started Guide](/docs/getting_started/quickstart) or [Community Support](https://github.com/llama-stack/llama-stack/discussions).
|
||||
For help choosing or troubleshooting, check our [Getting Started Guide](/docs/getting_started/quickstart) or [Community Support](https://github.com/llamastack/llama-stack/discussions).
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ You can start a chroma-db easily using docker.
|
|||
# This is where the indices are persisted
|
||||
mkdir -p $HOME/chromadb
|
||||
|
||||
podman run --rm -it \
|
||||
docker run --rm -it \
|
||||
--network host \
|
||||
--name chromadb \
|
||||
-v $HOME/chromadb:/chroma/chroma \
|
||||
|
|
@ -127,7 +127,7 @@ docker run -it \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||
-v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
|
||||
-v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
|
||||
# localhost/distribution-dell:dev if building / testing locally
|
||||
llamastack/distribution-dell\
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
|||
|
|
@ -14,13 +14,13 @@ Llama Stack is the open-source framework for building generative AI applications
|
|||
|
||||
:::tip Llama 4 is here!
|
||||
|
||||
Check out [Getting Started with Llama 4](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started_llama4.ipynb)
|
||||
Check out [Getting Started with Llama 4](https://colab.research.google.com/github/llamastack/llama-stack/blob/main/docs/getting_started_llama4.ipynb)
|
||||
|
||||
:::
|
||||
|
||||
:::tip News
|
||||
|
||||
Llama Stack is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases) for more details.
|
||||
Llama Stack is now available! See the [release notes](https://github.com/llamastack/llama-stack/releases) for more details.
|
||||
|
||||
:::
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ Llama Stack consists of a server (with multiple pluggable API providers) and Cli
|
|||
|
||||
## Quick Links
|
||||
|
||||
- Ready to build? Check out the [Getting Started Guide](https://llama-stack.github.io/getting_started/quickstart) to get started.
|
||||
- Ready to build? Check out the [Getting Started Guide](/docs/getting_started/quickstart) to get started.
|
||||
- Want to contribute? See the [Contributing Guide](https://github.com/llamastack/llama-stack/blob/main/CONTRIBUTING.md).
|
||||
- Explore [Example Applications](https://github.com/llamastack/llama-stack-apps) built with Llama Stack.
|
||||
|
||||
|
|
@ -59,13 +59,13 @@ Llama Stack provides adapters for popular providers across all API categories:
|
|||
- **Training & Evaluation**: HuggingFace, TorchTune, NVIDIA NEMO
|
||||
|
||||
:::info Provider Details
|
||||
For complete provider compatibility and setup instructions, see our [Providers Documentation](https://llamastack.github.io/providers/).
|
||||
For complete provider compatibility and setup instructions, see our [Providers Documentation](https://llamastack.github.io/docs/providers/).
|
||||
:::
|
||||
|
||||
## Get Started Today
|
||||
|
||||
<div style={{display: 'flex', gap: '1rem', flexWrap: 'wrap', margin: '2rem 0'}}>
|
||||
<a href="https://llama-stack.github.io/getting_started/quickstart"
|
||||
<a href="/docs/getting_started/quickstart"
|
||||
style={{
|
||||
background: 'var(--ifm-color-primary)',
|
||||
color: 'white',
|
||||
|
|
|
|||
|
|
@ -1,12 +1,7 @@
|
|||
---
|
||||
description: "Agents API for creating and interacting with agentic systems.
|
||||
description: "Agents
|
||||
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details."
|
||||
APIs for creating and interacting with agentic systems."
|
||||
sidebar_label: Agents
|
||||
title: Agents
|
||||
---
|
||||
|
|
@ -15,13 +10,8 @@ title: Agents
|
|||
|
||||
## Overview
|
||||
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
Agents
|
||||
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
APIs for creating and interacting with agentic systems.
|
||||
|
||||
This section contains documentation for all available providers for the **agents** API.
|
||||
|
|
|
|||
|
|
@ -11,38 +11,6 @@ an example entry in your build.yaml should look like:
|
|||
module: ramalama_stack
|
||||
```
|
||||
|
||||
Additionally you can configure the `external_providers_dir` in your Llama Stack configuration. This method is in the process of being deprecated in favor of the `module` method. If using this method, the external provider directory should contain your external provider specifications:
|
||||
|
||||
```yaml
|
||||
external_providers_dir: ~/.llama/providers.d/
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
||||
The external providers directory should follow this structure:
|
||||
|
||||
```
|
||||
providers.d/
|
||||
remote/
|
||||
inference/
|
||||
custom_ollama.yaml
|
||||
vllm.yaml
|
||||
vector_io/
|
||||
qdrant.yaml
|
||||
safety/
|
||||
llama-guard.yaml
|
||||
inline/
|
||||
inference/
|
||||
custom_ollama.yaml
|
||||
vllm.yaml
|
||||
vector_io/
|
||||
qdrant.yaml
|
||||
safety/
|
||||
llama-guard.yaml
|
||||
```
|
||||
|
||||
Each YAML file in these directories defines a provider specification for that particular API.
|
||||
|
||||
## Provider Types
|
||||
|
||||
Llama Stack supports two types of external providers:
|
||||
|
|
@ -50,30 +18,37 @@ Llama Stack supports two types of external providers:
|
|||
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
||||
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
||||
|
||||
|
||||
### Provider Specification (Common between inline and remote providers)
|
||||
|
||||
- `provider_type`: The type of the provider to be installed (remote or inline). eg. `remote::ollama`
|
||||
- `api`: The API for this provider, eg. `inference`
|
||||
- `config_class`: The full path to the configuration class
|
||||
- `module`: The Python module containing the provider implementation
|
||||
- `optional_api_dependencies`: List of optional Llama Stack APIs that this provider can use
|
||||
- `api_dependencies`: List of Llama Stack APIs that this provider depends on
|
||||
- `provider_data_validator`: Optional validator for provider data.
|
||||
- `pip_packages`: List of Python packages required by the provider
|
||||
|
||||
### Remote Provider Specification
|
||||
|
||||
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
||||
|
||||
```yaml
|
||||
adapter:
|
||||
adapter_type: custom_ollama
|
||||
pip_packages:
|
||||
- ollama
|
||||
- aiohttp
|
||||
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
||||
module: llama_stack_ollama_provider
|
||||
adapter_type: custom_ollama
|
||||
provider_type: "remote::ollama"
|
||||
pip_packages:
|
||||
- ollama
|
||||
- aiohttp
|
||||
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
||||
module: llama_stack_ollama_provider
|
||||
api_dependencies: []
|
||||
optional_api_dependencies: []
|
||||
```
|
||||
|
||||
#### Adapter Configuration
|
||||
#### Remote Provider Configuration
|
||||
|
||||
The `adapter` section defines how to load and configure the provider:
|
||||
|
||||
- `adapter_type`: A unique identifier for this adapter
|
||||
- `pip_packages`: List of Python packages required by the provider
|
||||
- `config_class`: The full path to the configuration class
|
||||
- `module`: The Python module containing the provider implementation
|
||||
- `adapter_type`: A unique identifier for this adapter, eg. `ollama`
|
||||
|
||||
### Inline Provider Specification
|
||||
|
||||
|
|
@ -81,6 +56,7 @@ Inline providers run locally within the Llama Stack process. Here's an example f
|
|||
|
||||
```yaml
|
||||
module: llama_stack_vector_provider
|
||||
provider_type: inline::llama_stack_vector_provider
|
||||
config_class: llama_stack_vector_provider.config.VectorStoreConfig
|
||||
pip_packages:
|
||||
- faiss-cpu
|
||||
|
|
@ -95,12 +71,6 @@ container_image: custom-vector-store:latest # optional
|
|||
|
||||
#### Inline Provider Fields
|
||||
|
||||
- `module`: The Python module containing the provider implementation
|
||||
- `config_class`: The full path to the configuration class
|
||||
- `pip_packages`: List of Python packages required by the provider
|
||||
- `api_dependencies`: List of Llama Stack APIs that this provider depends on
|
||||
- `optional_api_dependencies`: List of optional Llama Stack APIs that this provider can use
|
||||
- `provider_data_validator`: Optional validator for provider data
|
||||
- `container_image`: Optional container image to use instead of pip packages
|
||||
|
||||
## Required Fields
|
||||
|
|
@ -113,20 +83,17 @@ All providers must contain a `get_provider_spec` function in their `provider` mo
|
|||
from llama_stack.providers.datatypes import (
|
||||
ProviderSpec,
|
||||
Api,
|
||||
AdapterSpec,
|
||||
remote_provider_spec,
|
||||
RemoteProviderSpec,
|
||||
)
|
||||
|
||||
|
||||
def get_provider_spec() -> ProviderSpec:
|
||||
return remote_provider_spec(
|
||||
return RemoteProviderSpec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="ramalama",
|
||||
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
||||
config_class="ramalama_stack.config.RamalamaImplConfig",
|
||||
module="ramalama_stack",
|
||||
),
|
||||
adapter_type="ramalama",
|
||||
pip_packages=["ramalama>=0.8.5", "pymilvus"],
|
||||
config_class="ramalama_stack.config.RamalamaImplConfig",
|
||||
module="ramalama_stack",
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -197,18 +164,16 @@ information. Execute the test for the Provider type you are developing.
|
|||
If your external provider isn't being loaded:
|
||||
|
||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
||||
2. Verify that the YAML files are properly formatted.
|
||||
3. Ensure all required Python packages are installed.
|
||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
||||
|
||||
## Examples
|
||||
|
||||
### Example using `external_providers_dir`: Custom Ollama Provider
|
||||
### How to create an external provider module
|
||||
|
||||
Here's a complete example of creating and using a custom Ollama provider:
|
||||
If you are creating a new external provider called `llama-stack-provider-ollama` here is how you would set up the package properly:
|
||||
|
||||
1. First, create the provider package:
|
||||
|
||||
|
|
@ -230,33 +195,28 @@ requires-python = ">=3.12"
|
|||
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
|
||||
```
|
||||
|
||||
3. Create the provider specification:
|
||||
|
||||
```yaml
|
||||
# ~/.llama/providers.d/remote/inference/custom_ollama.yaml
|
||||
adapter:
|
||||
adapter_type: custom_ollama
|
||||
pip_packages: ["ollama", "aiohttp"]
|
||||
config_class: llama_stack_provider_ollama.config.OllamaImplConfig
|
||||
module: llama_stack_provider_ollama
|
||||
api_dependencies: []
|
||||
optional_api_dependencies: []
|
||||
```
|
||||
|
||||
4. Install the provider:
|
||||
3. Install the provider:
|
||||
|
||||
```bash
|
||||
uv pip install -e .
|
||||
```
|
||||
|
||||
5. Configure Llama Stack to use external providers:
|
||||
4. Edit `provider.py`
|
||||
|
||||
```yaml
|
||||
external_providers_dir: ~/.llama/providers.d/
|
||||
provider.py must be updated to contain `get_provider_spec`. This is used by llama stack to install the provider.
|
||||
|
||||
```python
|
||||
def get_provider_spec() -> ProviderSpec:
|
||||
return RemoteProviderSpec(
|
||||
api=Api.inference,
|
||||
adapter_type="llama-stack-provider-ollama",
|
||||
pip_packages=["ollama", "aiohttp"],
|
||||
config_class="llama_stack_provider_ollama.config.OllamaImplConfig",
|
||||
module="llama_stack_provider_ollama",
|
||||
)
|
||||
```
|
||||
|
||||
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
||||
|
||||
5. Implement the provider as outlined above with `get_provider_impl` or `get_adapter_impl`, etc.
|
||||
|
||||
### Example using `module`: ramalama-stack
|
||||
|
||||
|
|
@ -275,7 +235,6 @@ distribution_spec:
|
|||
module: ramalama_stack==0.3.0a0
|
||||
image_type: venv
|
||||
image_name: null
|
||||
external_providers_dir: null
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Ollama inference provider for running local models through the Ollama runtime.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Passthrough inference provider for connecting to any external inference service
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
| `api_token` | `str \| None` | No | | The API token |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ Available Models:
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||
| `api_token` | `str \| None` | No | fake | The API token |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
||||
| `project_id` | `str \| None` | No | | The Project ID key |
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||
|
|
|
|||
|
|
@ -16,14 +16,14 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
|
|||
|-------|------|----------|---------|-------------|
|
||||
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
|
||||
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
||||
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -15,6 +15,50 @@ const config: Config = {
|
|||
onBrokenMarkdownLinks: "warn",
|
||||
favicon: "img/favicon.ico",
|
||||
|
||||
// Enhanced favicon and meta configuration
|
||||
headTags: [
|
||||
{
|
||||
tagName: 'link',
|
||||
attributes: {
|
||||
rel: 'icon',
|
||||
type: 'image/png',
|
||||
sizes: '32x32',
|
||||
href: '/img/favicon-32x32.png',
|
||||
},
|
||||
},
|
||||
{
|
||||
tagName: 'link',
|
||||
attributes: {
|
||||
rel: 'icon',
|
||||
type: 'image/png',
|
||||
sizes: '16x16',
|
||||
href: '/img/favicon-16x16.png',
|
||||
},
|
||||
},
|
||||
{
|
||||
tagName: 'link',
|
||||
attributes: {
|
||||
rel: 'apple-touch-icon',
|
||||
sizes: '180x180',
|
||||
href: '/img/llama-stack-logo.png',
|
||||
},
|
||||
},
|
||||
{
|
||||
tagName: 'meta',
|
||||
attributes: {
|
||||
name: 'theme-color',
|
||||
content: '#7C3AED', // Purple color from your logo
|
||||
},
|
||||
},
|
||||
{
|
||||
tagName: 'link',
|
||||
attributes: {
|
||||
rel: 'manifest',
|
||||
href: '/site.webmanifest',
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
// GitHub pages deployment config.
|
||||
organizationName: 'reluctantfuturist',
|
||||
projectName: 'llama-stack',
|
||||
|
|
@ -26,9 +70,6 @@ const config: Config = {
|
|||
{
|
||||
docs: {
|
||||
sidebarPath: require.resolve("./sidebars.ts"),
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl: 'https://github.com/meta-llama/llama-stack/tree/main/docs/',
|
||||
docItemComponent: "@theme/ApiItem", // Derived from docusaurus-theme-openapi
|
||||
},
|
||||
blog: false,
|
||||
|
|
@ -55,10 +96,27 @@ const config: Config = {
|
|||
label: 'Docs',
|
||||
},
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'apiSidebar',
|
||||
position: 'left',
|
||||
type: 'dropdown',
|
||||
label: 'API Reference',
|
||||
position: 'left',
|
||||
to: '/docs/api-overview',
|
||||
items: [
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'stableApiSidebar',
|
||||
label: '🟢 Stable APIs',
|
||||
},
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'experimentalApiSidebar',
|
||||
label: '🟡 Experimental APIs',
|
||||
},
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'deprecatedApiSidebar',
|
||||
label: '🔴 Deprecated APIs',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
href: 'https://github.com/llamastack/llama-stack',
|
||||
|
|
@ -83,7 +141,7 @@ const config: Config = {
|
|||
},
|
||||
{
|
||||
label: 'API Reference',
|
||||
to: '/docs/api/llama-stack-specification',
|
||||
to: '/docs/api-overview',
|
||||
},
|
||||
],
|
||||
},
|
||||
|
|
@ -170,7 +228,7 @@ const config: Config = {
|
|||
id: "openapi",
|
||||
docsPluginId: "classic",
|
||||
config: {
|
||||
llamastack: {
|
||||
stable: {
|
||||
specPath: "static/llama-stack-spec.yaml",
|
||||
outputDir: "docs/api",
|
||||
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/llama-stack-spec.yaml",
|
||||
|
|
@ -179,6 +237,24 @@ const config: Config = {
|
|||
categoryLinkSource: "tag",
|
||||
},
|
||||
} satisfies OpenApiPlugin.Options,
|
||||
experimental: {
|
||||
specPath: "static/experimental-llama-stack-spec.yaml",
|
||||
outputDir: "docs/api-experimental",
|
||||
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/experimental-llama-stack-spec.yaml",
|
||||
sidebarOptions: {
|
||||
groupPathsBy: "tag",
|
||||
categoryLinkSource: "tag",
|
||||
},
|
||||
} satisfies OpenApiPlugin.Options,
|
||||
deprecated: {
|
||||
specPath: "static/deprecated-llama-stack-spec.yaml",
|
||||
outputDir: "docs/api-deprecated",
|
||||
downloadUrl: "https://raw.githubusercontent.com/meta-llama/llama-stack/main/docs/static/deprecated-llama-stack-spec.yaml",
|
||||
sidebarOptions: {
|
||||
groupPathsBy: "tag",
|
||||
categoryLinkSource: "tag",
|
||||
},
|
||||
} satisfies OpenApiPlugin.Options,
|
||||
} satisfies Plugin.PluginOptions,
|
||||
},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -34,40 +34,59 @@ def str_presenter(dumper, data):
|
|||
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
||||
|
||||
|
||||
def main(output_dir: str):
|
||||
output_dir = Path(output_dir)
|
||||
if not output_dir.exists():
|
||||
raise ValueError(f"Directory {output_dir} does not exist")
|
||||
def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: bool = False, combined_spec: bool = False):
|
||||
"""Generate OpenAPI spec with optional stability filtering."""
|
||||
|
||||
# Validate API protocols before generating spec
|
||||
return_type_errors = validate_api()
|
||||
if return_type_errors:
|
||||
print("\nAPI Method Return Type Validation Errors:\n")
|
||||
for error in return_type_errors:
|
||||
print(error, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
now = str(datetime.now())
|
||||
print(
|
||||
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
||||
)
|
||||
print("")
|
||||
if combined_spec:
|
||||
# Special case for combined stable + experimental APIs
|
||||
title_suffix = " - Stable & Experimental APIs"
|
||||
filename_prefix = "stainless-"
|
||||
description_suffix = "\n\n**🔗 COMBINED**: This specification includes both stable production-ready APIs and experimental pre-release APIs. Use stable APIs for production deployments and experimental APIs for testing new features."
|
||||
# Use the special "stainless" filter to include stable + experimental APIs
|
||||
stability_filter = "stainless"
|
||||
elif stability_filter:
|
||||
title_suffix = {
|
||||
"stable": " - Stable APIs" if not main_spec else "",
|
||||
"experimental": " - Experimental APIs",
|
||||
"deprecated": " - Deprecated APIs"
|
||||
}.get(stability_filter, f" - {stability_filter.title()} APIs")
|
||||
|
||||
# Use main spec filename for stable when main_spec=True
|
||||
if main_spec and stability_filter == "stable":
|
||||
filename_prefix = ""
|
||||
else:
|
||||
filename_prefix = f"{stability_filter}-"
|
||||
|
||||
description_suffix = {
|
||||
"stable": "\n\n**✅ STABLE**: Production-ready APIs with backward compatibility guarantees.",
|
||||
"experimental": "\n\n**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before becoming stable.",
|
||||
"deprecated": "\n\n**⚠️ DEPRECATED**: Legacy APIs that may be removed in future versions. Use for migration reference only."
|
||||
}.get(stability_filter, "")
|
||||
else:
|
||||
title_suffix = ""
|
||||
filename_prefix = ""
|
||||
description_suffix = ""
|
||||
|
||||
spec = Specification(
|
||||
LlamaStack,
|
||||
Options(
|
||||
server=Server(url="http://any-hosted-llama-stack.com"),
|
||||
info=Info(
|
||||
title="Llama Stack Specification",
|
||||
title=f"Llama Stack Specification{title_suffix}",
|
||||
version=LLAMA_STACK_API_V1,
|
||||
description="""This is the specification of the Llama Stack that provides
|
||||
description=f"""This is the specification of the Llama Stack that provides
|
||||
a set of endpoints and their corresponding interfaces that are tailored to
|
||||
best leverage Llama Models.""",
|
||||
best leverage Llama Models.{description_suffix}""",
|
||||
),
|
||||
include_standard_error_responses=True,
|
||||
stability_filter=stability_filter, # Pass the filter to the generator
|
||||
),
|
||||
)
|
||||
|
||||
with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
|
||||
yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
|
||||
html_filename = f"{filename_prefix}llama-stack-spec.html"
|
||||
|
||||
with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
|
||||
y = yaml.YAML()
|
||||
y.default_flow_style = False
|
||||
y.block_seq_indent = 2
|
||||
|
|
@ -83,9 +102,39 @@ def main(output_dir: str):
|
|||
fp,
|
||||
)
|
||||
|
||||
with open(output_dir / "llama-stack-spec.html", "w") as fp:
|
||||
with open(output_dir / html_filename, "w") as fp:
|
||||
spec.write_html(fp, pretty_print=True)
|
||||
|
||||
print(f"Generated {yaml_filename} and {html_filename}")
|
||||
|
||||
def main(output_dir: str):
|
||||
output_dir = Path(output_dir)
|
||||
if not output_dir.exists():
|
||||
raise ValueError(f"Directory {output_dir} does not exist")
|
||||
|
||||
# Validate API protocols before generating spec
|
||||
return_type_errors = validate_api()
|
||||
if return_type_errors:
|
||||
print("\nAPI Method Return Type Validation Errors:\n")
|
||||
for error in return_type_errors:
|
||||
print(error, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
now = str(datetime.now())
|
||||
print(f"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at {now}")
|
||||
print("")
|
||||
|
||||
# Generate main spec as stable APIs (llama-stack-spec.yaml)
|
||||
print("Generating main specification (stable APIs)...")
|
||||
generate_spec(output_dir, "stable", main_spec=True)
|
||||
|
||||
print("Generating other stability-filtered specifications...")
|
||||
generate_spec(output_dir, "experimental")
|
||||
generate_spec(output_dir, "deprecated")
|
||||
|
||||
print("Generating combined stable + experimental specification...")
|
||||
generate_spec(output_dir, combined_spec=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
||||
|
|
|
|||
|
|
@ -7,13 +7,14 @@
|
|||
import hashlib
|
||||
import inspect
|
||||
import ipaddress
|
||||
import os
|
||||
import types
|
||||
import typing
|
||||
from dataclasses import make_dataclass
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
|
||||
|
||||
from fastapi import UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.datatypes import Error
|
||||
from llama_stack.strong_typing.core import JsonType
|
||||
|
|
@ -35,6 +36,7 @@ from llama_stack.strong_typing.schema import (
|
|||
SchemaOptions,
|
||||
)
|
||||
from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .operations import (
|
||||
EndpointOperation,
|
||||
|
|
@ -48,6 +50,7 @@ from .specification import (
|
|||
Document,
|
||||
Example,
|
||||
ExampleRef,
|
||||
ExtraBodyParameter,
|
||||
MediaType,
|
||||
Operation,
|
||||
Parameter,
|
||||
|
|
@ -546,6 +549,84 @@ class Generator:
|
|||
|
||||
return extra_tags
|
||||
|
||||
def _get_api_group_for_operation(self, op) -> str | None:
|
||||
"""
|
||||
Determine the API group for an operation based on its route path.
|
||||
|
||||
Args:
|
||||
op: The endpoint operation
|
||||
|
||||
Returns:
|
||||
The API group name derived from the route, or None if unable to determine
|
||||
"""
|
||||
if not hasattr(op, 'webmethod') or not op.webmethod or not hasattr(op.webmethod, 'route'):
|
||||
return None
|
||||
|
||||
route = op.webmethod.route
|
||||
if not route or not route.startswith('/'):
|
||||
return None
|
||||
|
||||
# Extract API group from route path
|
||||
# Examples: /v1/agents/list -> agents-api
|
||||
# /v1/responses -> responses-api
|
||||
# /v1/models -> models-api
|
||||
path_parts = route.strip('/').split('/')
|
||||
|
||||
if len(path_parts) < 2:
|
||||
return None
|
||||
|
||||
# Skip version prefix (v1, v1alpha, v1beta, etc.)
|
||||
if path_parts[0].startswith('v1'):
|
||||
if len(path_parts) < 2:
|
||||
return None
|
||||
api_segment = path_parts[1]
|
||||
else:
|
||||
api_segment = path_parts[0]
|
||||
|
||||
# Convert to supplementary file naming convention
|
||||
# agents -> agents-api, responses -> responses-api, etc.
|
||||
return f"{api_segment}-api"
|
||||
|
||||
def _load_supplemental_content(self, api_group: str | None) -> str:
|
||||
"""
|
||||
Load supplemental content for an API group based on stability level.
|
||||
|
||||
Follows this resolution order:
|
||||
1. docs/supplementary/{stability}/{api_group}.md
|
||||
2. docs/supplementary/shared/{api_group}.md (fallback)
|
||||
3. Empty string if no files found
|
||||
|
||||
Args:
|
||||
api_group: The API group name (e.g., "agents-responses-api"), or None if no mapping exists
|
||||
|
||||
Returns:
|
||||
The supplemental content as markdown string, or empty string if not found
|
||||
"""
|
||||
if not api_group:
|
||||
return ""
|
||||
|
||||
base_path = Path(__file__).parent.parent.parent / "supplementary"
|
||||
|
||||
# Try stability-specific content first if stability filter is set
|
||||
if self.options.stability_filter:
|
||||
stability_path = base_path / self.options.stability_filter / f"{api_group}.md"
|
||||
if stability_path.exists():
|
||||
try:
|
||||
return stability_path.read_text(encoding="utf-8")
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not read stability-specific supplemental content from {stability_path}: {e}")
|
||||
|
||||
# Fall back to shared content
|
||||
shared_path = base_path / "shared" / f"{api_group}.md"
|
||||
if shared_path.exists():
|
||||
try:
|
||||
return shared_path.read_text(encoding="utf-8")
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not read shared supplemental content from {shared_path}: {e}")
|
||||
|
||||
# No supplemental content found
|
||||
return ""
|
||||
|
||||
def _build_operation(self, op: EndpointOperation) -> Operation:
|
||||
if op.defining_class.__name__ in [
|
||||
"SyntheticDataGeneration",
|
||||
|
|
@ -597,6 +678,27 @@ class Generator:
|
|||
# parameters passed anywhere
|
||||
parameters = path_parameters + query_parameters
|
||||
|
||||
# Build extra body parameters documentation
|
||||
extra_body_parameters = []
|
||||
for param_name, param_type, description in op.extra_body_params:
|
||||
if is_type_optional(param_type):
|
||||
inner_type: type = unwrap_optional_type(param_type)
|
||||
required = False
|
||||
else:
|
||||
inner_type = param_type
|
||||
required = True
|
||||
|
||||
# Use description from ExtraBodyField if available, otherwise from docstring
|
||||
param_description = description or doc_params.get(param_name)
|
||||
|
||||
extra_body_param = ExtraBodyParameter(
|
||||
name=param_name,
|
||||
schema=self.schema_builder.classdef_to_ref(inner_type),
|
||||
description=param_description,
|
||||
required=required,
|
||||
)
|
||||
extra_body_parameters.append(extra_body_param)
|
||||
|
||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
||||
raw_bytes_request_body = False
|
||||
if webmethod:
|
||||
|
|
@ -797,10 +899,14 @@ class Generator:
|
|||
else:
|
||||
callbacks = None
|
||||
|
||||
description = "\n".join(
|
||||
# Build base description from docstring
|
||||
base_description = "\n".join(
|
||||
filter(None, [doc_string.short_description, doc_string.long_description])
|
||||
)
|
||||
|
||||
# Individual endpoints get clean descriptions only
|
||||
description = base_description
|
||||
|
||||
return Operation(
|
||||
tags=[
|
||||
getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)
|
||||
|
|
@ -811,16 +917,126 @@ class Generator:
|
|||
requestBody=requestBody,
|
||||
responses=responses,
|
||||
callbacks=callbacks,
|
||||
deprecated=True if "DEPRECATED" in op.func_name else None,
|
||||
deprecated=getattr(op.webmethod, "deprecated", False)
|
||||
or "DEPRECATED" in op.func_name,
|
||||
security=[] if op.public else None,
|
||||
extraBodyParameters=extra_body_parameters if extra_body_parameters else None,
|
||||
)
|
||||
|
||||
def _get_api_stability_priority(self, api_level: str) -> int:
|
||||
"""
|
||||
Return sorting priority for API stability levels.
|
||||
Lower numbers = higher priority (appear first)
|
||||
|
||||
:param api_level: The API level (e.g., "v1", "v1beta", "v1alpha")
|
||||
:return: Priority number for sorting
|
||||
"""
|
||||
stability_order = {
|
||||
"v1": 0, # Stable - highest priority
|
||||
"v1beta": 1, # Beta - medium priority
|
||||
"v1alpha": 2, # Alpha - lowest priority
|
||||
}
|
||||
return stability_order.get(api_level, 999) # Unknown levels go last
|
||||
|
||||
def generate(self) -> Document:
|
||||
paths: Dict[str, PathItem] = {}
|
||||
endpoint_classes: Set[type] = set()
|
||||
for op in get_endpoint_operations(
|
||||
self.endpoint, use_examples=self.options.use_examples
|
||||
):
|
||||
|
||||
# Collect all operations and filter by stability if specified
|
||||
operations = list(
|
||||
get_endpoint_operations(
|
||||
self.endpoint, use_examples=self.options.use_examples
|
||||
)
|
||||
)
|
||||
|
||||
# Filter operations by stability level if requested
|
||||
if self.options.stability_filter:
|
||||
filtered_operations = []
|
||||
for op in operations:
|
||||
deprecated = (
|
||||
getattr(op.webmethod, "deprecated", False)
|
||||
or "DEPRECATED" in op.func_name
|
||||
)
|
||||
stability_level = op.webmethod.level
|
||||
|
||||
if self.options.stability_filter == "stable":
|
||||
# Include v1 non-deprecated endpoints
|
||||
if stability_level == "v1" and not deprecated:
|
||||
filtered_operations.append(op)
|
||||
elif self.options.stability_filter == "experimental":
|
||||
# Include v1alpha and v1beta endpoints (deprecated or not)
|
||||
if stability_level in ["v1alpha", "v1beta"]:
|
||||
filtered_operations.append(op)
|
||||
elif self.options.stability_filter == "deprecated":
|
||||
# Include only deprecated endpoints
|
||||
if deprecated:
|
||||
filtered_operations.append(op)
|
||||
elif self.options.stability_filter == "stainless":
|
||||
# Include both stable (v1 non-deprecated) and experimental (v1alpha, v1beta) endpoints
|
||||
if (stability_level == "v1" and not deprecated) or stability_level in ["v1alpha", "v1beta"]:
|
||||
filtered_operations.append(op)
|
||||
|
||||
operations = filtered_operations
|
||||
print(
|
||||
f"Filtered to {len(operations)} operations for stability level: {self.options.stability_filter}"
|
||||
)
|
||||
|
||||
# Sort operations by multiple criteria for consistent ordering:
|
||||
# 1. Stability level with deprecation handling (global priority):
|
||||
# - Active stable (v1) comes first
|
||||
# - Beta (v1beta) comes next
|
||||
# - Alpha (v1alpha) comes next
|
||||
# - Deprecated stable (v1 deprecated) comes last
|
||||
# 2. Route path (group related endpoints within same stability level)
|
||||
# 3. HTTP method (GET, POST, PUT, DELETE, PATCH)
|
||||
# 4. Operation name (alphabetical)
|
||||
def sort_key(op):
|
||||
http_method_order = {
|
||||
HTTPMethod.GET: 0,
|
||||
HTTPMethod.POST: 1,
|
||||
HTTPMethod.PUT: 2,
|
||||
HTTPMethod.DELETE: 3,
|
||||
HTTPMethod.PATCH: 4,
|
||||
}
|
||||
|
||||
# Enhanced stability priority for migration pattern support
|
||||
deprecated = getattr(op.webmethod, "deprecated", False)
|
||||
stability_priority = self._get_api_stability_priority(op.webmethod.level)
|
||||
|
||||
# Deprecated versions should appear after everything else
|
||||
# This ensures deprecated stable endpoints come last globally
|
||||
if deprecated:
|
||||
stability_priority += 10 # Push deprecated endpoints to the end
|
||||
|
||||
return (
|
||||
stability_priority, # Global stability handling comes first
|
||||
op.get_route(
|
||||
op.webmethod
|
||||
), # Group by route path within stability level
|
||||
http_method_order.get(op.http_method, 999),
|
||||
op.func_name,
|
||||
)
|
||||
|
||||
operations.sort(key=sort_key)
|
||||
|
||||
# Debug output for migration pattern tracking
|
||||
migration_routes = {}
|
||||
for op in operations:
|
||||
route_key = (op.get_route(op.webmethod), op.http_method)
|
||||
if route_key not in migration_routes:
|
||||
migration_routes[route_key] = []
|
||||
migration_routes[route_key].append(
|
||||
(op.webmethod.level, getattr(op.webmethod, "deprecated", False))
|
||||
)
|
||||
|
||||
for route_key, versions in migration_routes.items():
|
||||
if len(versions) > 1:
|
||||
print(f"Migration pattern detected for {route_key[1]} {route_key[0]}:")
|
||||
for level, deprecated in versions:
|
||||
status = "DEPRECATED" if deprecated else "ACTIVE"
|
||||
print(f" - {level} ({status})")
|
||||
|
||||
for op in operations:
|
||||
endpoint_classes.add(op.defining_class)
|
||||
|
||||
operation = self._build_operation(op)
|
||||
|
|
@ -851,10 +1067,22 @@ class Generator:
|
|||
doc_string = parse_type(cls)
|
||||
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
||||
continue
|
||||
|
||||
# Add supplemental content to tag pages
|
||||
api_group = f"{cls.__name__.lower()}-api"
|
||||
supplemental_content = self._load_supplemental_content(api_group)
|
||||
|
||||
tag_description = doc_string.long_description or ""
|
||||
if supplemental_content:
|
||||
if tag_description:
|
||||
tag_description = f"{tag_description}\n\n{supplemental_content}"
|
||||
else:
|
||||
tag_description = supplemental_content
|
||||
|
||||
operation_tags.append(
|
||||
Tag(
|
||||
name=cls.__name__,
|
||||
description=doc_string.long_description,
|
||||
description=tag_description,
|
||||
displayName=doc_string.short_description,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,10 +19,12 @@ from llama_stack.strong_typing.inspection import get_signature
|
|||
|
||||
from typing import get_origin, get_args
|
||||
|
||||
from fastapi import UploadFile
|
||||
from fastapi import UploadFile
|
||||
from fastapi.params import File, Form
|
||||
from typing import Annotated
|
||||
|
||||
from llama_stack.schema_utils import ExtraBodyField
|
||||
|
||||
|
||||
def split_prefix(
|
||||
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
||||
|
|
@ -89,6 +91,7 @@ class EndpointOperation:
|
|||
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
||||
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
||||
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
||||
:param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK.
|
||||
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
||||
:param response_type: The Python type of the data that is transmitted in the response body.
|
||||
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
||||
|
|
@ -106,6 +109,7 @@ class EndpointOperation:
|
|||
query_params: List[OperationParameter]
|
||||
request_params: Optional[OperationParameter]
|
||||
multipart_params: List[OperationParameter]
|
||||
extra_body_params: List[tuple[str, type, str | None]]
|
||||
event_type: Optional[type]
|
||||
response_type: type
|
||||
http_method: HTTPMethod
|
||||
|
|
@ -265,6 +269,7 @@ def get_endpoint_operations(
|
|||
query_params = []
|
||||
request_params = []
|
||||
multipart_params = []
|
||||
extra_body_params = []
|
||||
|
||||
for param_name, parameter in signature.parameters.items():
|
||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||
|
|
@ -279,6 +284,13 @@ def get_endpoint_operations(
|
|||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||
)
|
||||
|
||||
# Check if this is an extra_body parameter
|
||||
is_extra_body, extra_body_desc = _is_extra_body_param(param_type)
|
||||
if is_extra_body:
|
||||
# Store in a separate list for documentation
|
||||
extra_body_params.append((param_name, param_type, extra_body_desc))
|
||||
continue # Skip adding to request_params
|
||||
|
||||
is_multipart = _is_multipart_param(param_type)
|
||||
|
||||
if prefix in ["get", "delete"]:
|
||||
|
|
@ -351,6 +363,7 @@ def get_endpoint_operations(
|
|||
query_params=query_params,
|
||||
request_params=request_params,
|
||||
multipart_params=multipart_params,
|
||||
extra_body_params=extra_body_params,
|
||||
event_type=event_type,
|
||||
response_type=response_type,
|
||||
http_method=http_method,
|
||||
|
|
@ -403,7 +416,7 @@ def get_endpoint_events(endpoint: type) -> Dict[str, type]:
|
|||
def _is_multipart_param(param_type: type) -> bool:
|
||||
"""
|
||||
Check if a parameter type indicates multipart form data.
|
||||
|
||||
|
||||
Returns True if the type is:
|
||||
- UploadFile
|
||||
- Annotated[UploadFile, File()]
|
||||
|
|
@ -413,19 +426,38 @@ def _is_multipart_param(param_type: type) -> bool:
|
|||
"""
|
||||
if param_type is UploadFile:
|
||||
return True
|
||||
|
||||
|
||||
# Check for Annotated types
|
||||
origin = get_origin(param_type)
|
||||
if origin is None:
|
||||
return False
|
||||
|
||||
|
||||
if origin is Annotated:
|
||||
args = get_args(param_type)
|
||||
if len(args) < 2:
|
||||
return False
|
||||
|
||||
|
||||
# Check the annotations for File() or Form()
|
||||
for annotation in args[1:]:
|
||||
if isinstance(annotation, (File, Form)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Check if parameter is marked as coming from extra_body.
|
||||
|
||||
Returns:
|
||||
(is_extra_body, description): Tuple of boolean and optional description
|
||||
"""
|
||||
origin = get_origin(param_type)
|
||||
if origin is Annotated:
|
||||
args = get_args(param_type)
|
||||
for annotation in args[1:]:
|
||||
if isinstance(annotation, ExtraBodyField):
|
||||
return True, annotation.description
|
||||
# Also check by type name for cases where import matters
|
||||
if type(annotation).__name__ == 'ExtraBodyField':
|
||||
return True, getattr(annotation, 'description', None)
|
||||
return False, None
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ class Options:
|
|||
property_description_fun: Optional[Callable[[type, str, str], str]] = None
|
||||
captions: Optional[Dict[str, str]] = None
|
||||
include_standard_error_responses: bool = True
|
||||
stability_filter: Optional[str] = None
|
||||
|
||||
default_captions: ClassVar[Dict[str, str]] = {
|
||||
"Operations": "Operations",
|
||||
|
|
|
|||
|
|
@ -106,6 +106,15 @@ class Parameter:
|
|||
example: Optional[Any] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtraBodyParameter:
|
||||
"""Represents a parameter that arrives via extra_body in the request."""
|
||||
name: str
|
||||
schema: SchemaOrRef
|
||||
description: Optional[str] = None
|
||||
required: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Operation:
|
||||
responses: Dict[str, Union[Response, ResponseRef]]
|
||||
|
|
@ -118,6 +127,7 @@ class Operation:
|
|||
callbacks: Optional[Dict[str, "Callback"]] = None
|
||||
security: Optional[List["SecurityRequirement"]] = None
|
||||
deprecated: Optional[bool] = None
|
||||
extraBodyParameters: Optional[List[ExtraBodyParameter]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -52,6 +52,17 @@ class Specification:
|
|||
if display_name:
|
||||
tag["x-displayName"] = display_name
|
||||
|
||||
# Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params
|
||||
paths = json_doc.get("paths", {})
|
||||
for path_item in paths.values():
|
||||
if isinstance(path_item, dict):
|
||||
for method in ["get", "post", "put", "delete", "patch"]:
|
||||
operation = path_item.get(method)
|
||||
if operation and isinstance(operation, dict):
|
||||
extra_body_params = operation.pop("extraBodyParameters", None)
|
||||
if extra_body_params:
|
||||
operation["x-llama-stack-extra-body-params"] = extra_body_params
|
||||
|
||||
return json_doc
|
||||
|
||||
def get_json_string(self, pretty_print: bool = False) -> str:
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Getting Started',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'getting_started/quickstart',
|
||||
'getting_started/detailed_tutorial',
|
||||
|
|
@ -26,7 +26,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Concepts',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'concepts/index',
|
||||
'concepts/architecture',
|
||||
|
|
@ -48,7 +48,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Distributions',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'distributions/index',
|
||||
'distributions/list_of_distributions',
|
||||
|
|
@ -93,7 +93,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Providers',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'providers/index',
|
||||
{
|
||||
|
|
@ -276,7 +276,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Building Applications',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'building_applications/index',
|
||||
'building_applications/rag',
|
||||
|
|
@ -293,7 +293,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Advanced APIs',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'advanced_apis/post_training',
|
||||
'advanced_apis/evaluation',
|
||||
|
|
@ -303,7 +303,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Deploying',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'deploying/index',
|
||||
'deploying/kubernetes_deployment',
|
||||
|
|
@ -313,7 +313,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'Contributing',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'contributing/index',
|
||||
'contributing/new_api_provider',
|
||||
|
|
@ -324,7 +324,7 @@ const sidebars: SidebarsConfig = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'References',
|
||||
collapsed: false,
|
||||
collapsed: true,
|
||||
items: [
|
||||
'references/index',
|
||||
'references/llama_cli_reference/index',
|
||||
|
|
@ -335,8 +335,10 @@ const sidebars: SidebarsConfig = {
|
|||
},
|
||||
],
|
||||
|
||||
// API Reference sidebar - use plugin-generated sidebar
|
||||
apiSidebar: require('./docs/api/sidebar.ts').default,
|
||||
// API Reference sidebars - use plugin-generated sidebars
|
||||
stableApiSidebar: require('./docs/api/sidebar.ts').default,
|
||||
experimentalApiSidebar: require('./docs/api-experimental/sidebar.ts').default,
|
||||
deprecatedApiSidebar: require('./docs/api-deprecated/sidebar.ts').default,
|
||||
};
|
||||
|
||||
export default sidebars;
|
||||
|
|
|
|||
|
|
@ -189,3 +189,29 @@ button[class*="button"]:hover,
|
|||
.pagination-nav__link--prev:hover {
|
||||
background-color: #f3f4f6 !important;
|
||||
}
|
||||
|
||||
/* Deprecated endpoint styling */
|
||||
.menu__list-item--deprecated .menu__link {
|
||||
text-decoration: line-through !important;
|
||||
opacity: 0.7;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.menu__list-item--deprecated .menu__link:hover {
|
||||
opacity: 0.9;
|
||||
}
|
||||
|
||||
/* Deprecated endpoint badges - slightly muted */
|
||||
.menu__list-item--deprecated.api-method > .menu__link::before {
|
||||
opacity: 0.7;
|
||||
border-style: dashed !important;
|
||||
}
|
||||
|
||||
/* Dark theme adjustments for deprecated endpoints */
|
||||
[data-theme='dark'] .menu__list-item--deprecated .menu__link {
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
[data-theme='dark'] .menu__list-item--deprecated .menu__link:hover {
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
|
|
|||
13427
docs/static/deprecated-llama-stack-spec.html
vendored
Normal file
10051
docs/static/deprecated-llama-stack-spec.yaml
vendored
Normal file
6450
docs/static/experimental-llama-stack-spec.html
vendored
Normal file
4798
docs/static/experimental-llama-stack-spec.yaml
vendored
Normal file
BIN
docs/static/img/favicon-16x16.png
vendored
Normal file
|
After Width: | Height: | Size: 657 B |
BIN
docs/static/img/favicon-32x32.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 KiB |
BIN
docs/static/img/favicon-48x48.png
vendored
Normal file
|
After Width: | Height: | Size: 3.3 KiB |
BIN
docs/static/img/favicon-64x64.png
vendored
Normal file
|
After Width: | Height: | Size: 4.9 KiB |
BIN
docs/static/img/favicon.ico
vendored
Normal file
|
After Width: | Height: | Size: 679 B |
BIN
docs/static/img/favicon.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 KiB |
BIN
docs/static/img/llama-stack.png
vendored
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 604 KiB |
22102
docs/static/llama-stack-spec.html
vendored
16877
docs/static/llama-stack-spec.yaml
vendored
BIN
docs/static/llama-stack.png
vendored
|
Before Width: | Height: | Size: 196 KiB |
36
docs/static/site.webmanifest
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"name": "Llama Stack",
|
||||
"short_name": "Llama Stack",
|
||||
"description": "The open-source framework for building generative AI applications",
|
||||
"start_url": "/",
|
||||
"display": "standalone",
|
||||
"theme_color": "#7C3AED",
|
||||
"background_color": "#ffffff",
|
||||
"icons": [
|
||||
{
|
||||
"src": "/img/favicon-16x16.png",
|
||||
"sizes": "16x16",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/img/favicon-32x32.png",
|
||||
"sizes": "32x32",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/img/favicon-48x48.png",
|
||||
"sizes": "48x48",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/img/favicon-64x64.png",
|
||||
"sizes": "64x64",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/img/llama-stack-logo.png",
|
||||
"sizes": "200x200",
|
||||
"type": "image/png"
|
||||
}
|
||||
]
|
||||
}
|
||||
18601
docs/static/stainless-llama-stack-spec.html
vendored
Normal file
13870
docs/static/stainless-llama-stack-spec.yaml
vendored
Normal file
9
docs/supplementary/deprecated/agents-api.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
## Deprecated APIs
|
||||
|
||||
> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.
|
||||
|
||||
### Migration Guidance
|
||||
|
||||
If you are using deprecated versions of the Agents or Responses APIs, please migrate to:
|
||||
|
||||
- **Responses API**: Use the stable v1 Responses API endpoints
|
||||
21
docs/supplementary/experimental/agents-api.md
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
## Agents API (Experimental)
|
||||
|
||||
> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback. Great for exploring new capabilities and providing feedback to influence the final design.
|
||||
|
||||
Main functionalities provided by this API:
|
||||
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
|
||||
### 🧪 Feedback Welcome
|
||||
|
||||
This API is actively being developed. We welcome feedback on:
|
||||
- API design and usability
|
||||
- Performance characteristics
|
||||
- Missing features or capabilities
|
||||
- Integration patterns
|
||||
|
||||
**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions) or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
|
||||
40
docs/supplementary/stable/agents-api.md
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
## Responses API
|
||||
|
||||
The Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.
|
||||
|
||||
> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.
|
||||
|
||||
### ✅ Supported Tools
|
||||
|
||||
The Responses API supports the following tool types:
|
||||
|
||||
- **`web_search`**: Search the web for current information and real-time data
|
||||
- **`file_search`**: Search through uploaded files and vector stores
|
||||
- Supports dynamic `vector_store_ids` per call
|
||||
- Compatible with OpenAI file search patterns
|
||||
- **`function`**: Call custom functions with JSON schema validation
|
||||
- **`mcp_tool`**: Model Context Protocol integration
|
||||
|
||||
### ✅ Supported Fields & Features
|
||||
|
||||
**Core Capabilities:**
|
||||
- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration
|
||||
- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths
|
||||
- **Rich Annotations**: Automatic file citations, URL citations, and container file citations
|
||||
- **Status Tracking**: Monitor tool call execution status and handle failures gracefully
|
||||
|
||||
### 🚧 Work in Progress
|
||||
|
||||
- Full real-time response streaming support
|
||||
- `tool_choice` parameter
|
||||
- `max_tool_calls` parameter
|
||||
- Built-in tools (code interpreter, containers API)
|
||||
- Safety & guardrails
|
||||
- `reasoning` capabilities
|
||||
- `service_tier`
|
||||
- `logprobs`
|
||||
- `max_output_tokens`
|
||||
- `metadata` handling
|
||||
- `instructions`
|
||||
- `incomplete_details`
|
||||
- `background`
|
||||
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.apis.safety import SafetyViolation
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
|
|
@ -42,6 +42,20 @@ from .openai_responses import (
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseShieldSpec(BaseModel):
|
||||
"""Specification for a shield to apply during response generation.
|
||||
|
||||
:param type: The type/identifier of the shield.
|
||||
"""
|
||||
|
||||
type: str
|
||||
# TODO: more fields to be added for shield configuration
|
||||
|
||||
|
||||
ResponseShield = str | ResponseShieldSpec
|
||||
|
||||
|
||||
class Attachment(BaseModel):
|
||||
"""An attachment to an agent turn.
|
||||
|
||||
|
|
@ -472,20 +486,23 @@ class AgentStepResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Agents(Protocol):
|
||||
"""Agents API for creating and interacting with agentic systems.
|
||||
"""Agents
|
||||
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
"""
|
||||
APIs for creating and interacting with agentic systems."""
|
||||
|
||||
@webmethod(
|
||||
route="/agents", method="POST", descriptive_name="create_agent", deprecated=True, level=LLAMA_STACK_API_V1
|
||||
route="/agents",
|
||||
method="POST",
|
||||
descriptive_name="create_agent",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/agents",
|
||||
method="POST",
|
||||
descriptive_name="create_agent",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
@webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def create_agent(
|
||||
self,
|
||||
agent_config: AgentConfig,
|
||||
|
|
@ -648,8 +665,17 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="GET",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
|
|
@ -666,9 +692,16 @@ class Agents(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="DELETE",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def delete_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
|
|
@ -681,7 +714,12 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}",
|
||||
method="DELETE",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def delete_agent(
|
||||
self,
|
||||
|
|
@ -704,7 +742,12 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}",
|
||||
method="GET",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_agent(self, agent_id: str) -> Agent:
|
||||
"""Describe an agent by its ID.
|
||||
|
|
@ -714,7 +757,12 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/sessions",
|
||||
method="GET",
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_agent_sessions(
|
||||
self,
|
||||
|
|
@ -738,6 +786,12 @@ class Agents(Protocol):
|
|||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/responses/{response_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_openai_response(
|
||||
self,
|
||||
|
|
@ -750,6 +804,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_openai_response(
|
||||
self,
|
||||
|
|
@ -764,6 +819,12 @@ class Agents(Protocol):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
shields: Annotated[
|
||||
list[ResponseShield] | None,
|
||||
ExtraBodyField(
|
||||
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a new OpenAI response.
|
||||
|
||||
|
|
@ -771,10 +832,12 @@ class Agents(Protocol):
|
|||
:param model: The underlying LLM used for completions.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
|
|
@ -793,6 +856,9 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_response_input_items(
|
||||
self,
|
||||
|
|
@ -815,6 +881,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete an OpenAI response by its ID.
|
||||
|
|
|
|||
|
|
@ -888,6 +888,10 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
|||
|
||||
input: list[OpenAIResponseInput]
|
||||
|
||||
def to_response_object(self) -> OpenAIResponseObject:
|
||||
"""Convert to OpenAIResponseObject by excluding input field."""
|
||||
return OpenAIResponseObject(**{k: v for k, v in self.model_dump().items() if k != "input"})
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListOpenAIResponseObject(BaseModel):
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ class Batches(Protocol):
|
|||
Note: This API is currently under active development and may undergo changes.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_batch(
|
||||
self,
|
||||
|
|
@ -63,6 +64,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Retrieve information about a specific batch.
|
||||
|
|
@ -72,6 +74,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Cancel a batch that is in progress.
|
||||
|
|
@ -81,6 +84,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_batches(
|
||||
self,
|
||||
|
|
|
|||
31
llama_stack/apis/conversations/__init__.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .conversations import (
|
||||
Conversation,
|
||||
ConversationCreateRequest,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
ConversationItemCreateRequest,
|
||||
ConversationItemDeletedResource,
|
||||
ConversationItemList,
|
||||
Conversations,
|
||||
ConversationUpdateRequest,
|
||||
Metadata,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Conversation",
|
||||
"ConversationCreateRequest",
|
||||
"ConversationDeletedResource",
|
||||
"ConversationItem",
|
||||
"ConversationItemCreateRequest",
|
||||
"ConversationItemDeletedResource",
|
||||
"ConversationItemList",
|
||||
"Conversations",
|
||||
"ConversationUpdateRequest",
|
||||
"Metadata",
|
||||
]
|
||||
260
llama_stack/apis/conversations/conversations.py
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||
|
||||
from openai import NOT_GIVEN
|
||||
from openai._types import NotGiven
|
||||
from openai.types.responses.response_includable import ResponseIncludable
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
Metadata = dict[str, str]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Conversation(BaseModel):
|
||||
"""OpenAI-compatible conversation object."""
|
||||
|
||||
id: str = Field(..., description="The unique ID of the conversation.")
|
||||
object: Literal["conversation"] = Field(
|
||||
default="conversation", description="The object type, which is always conversation."
|
||||
)
|
||||
created_at: int = Field(
|
||||
..., description="The time at which the conversation was created, measured in seconds since the Unix epoch."
|
||||
)
|
||||
metadata: Metadata | None = Field(
|
||||
default=None,
|
||||
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.",
|
||||
)
|
||||
items: list[dict] | None = Field(
|
||||
default=None,
|
||||
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationMessage(BaseModel):
|
||||
"""OpenAI-compatible message item for conversations."""
|
||||
|
||||
id: str = Field(..., description="unique identifier for this message")
|
||||
content: list[dict] = Field(..., description="message content")
|
||||
role: str = Field(..., description="message role")
|
||||
status: str = Field(..., description="message status")
|
||||
type: Literal["message"] = "message"
|
||||
object: Literal["message"] = "message"
|
||||
|
||||
|
||||
ConversationItem = Annotated[
|
||||
OpenAIResponseMessage
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(ConversationItem, name="ConversationItem")
|
||||
|
||||
# Using OpenAI types directly caused issues but some notes for reference:
|
||||
# Note that ConversationItem is a Annotated Union of the types below:
|
||||
# from openai.types.responses import *
|
||||
# from openai.types.responses.response_item import *
|
||||
# from openai.types.conversations import ConversationItem
|
||||
# f = [
|
||||
# ResponseFunctionToolCallItem,
|
||||
# ResponseFunctionToolCallOutputItem,
|
||||
# ResponseFileSearchToolCall,
|
||||
# ResponseFunctionWebSearch,
|
||||
# ImageGenerationCall,
|
||||
# ResponseComputerToolCall,
|
||||
# ResponseComputerToolCallOutputItem,
|
||||
# ResponseReasoningItem,
|
||||
# ResponseCodeInterpreterToolCall,
|
||||
# LocalShellCall,
|
||||
# LocalShellCallOutput,
|
||||
# McpListTools,
|
||||
# McpApprovalRequest,
|
||||
# McpApprovalResponse,
|
||||
# McpCall,
|
||||
# ResponseCustomToolCall,
|
||||
# ResponseCustomToolCallOutput
|
||||
# ]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationCreateRequest(BaseModel):
|
||||
"""Request body for creating a conversation."""
|
||||
|
||||
items: list[ConversationItem] | None = Field(
|
||||
default=[],
|
||||
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
max_length=20,
|
||||
)
|
||||
metadata: Metadata | None = Field(
|
||||
default={},
|
||||
description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
|
||||
max_length=16,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationUpdateRequest(BaseModel):
|
||||
"""Request body for updating a conversation."""
|
||||
|
||||
metadata: Metadata = Field(
|
||||
...,
|
||||
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationDeletedResource(BaseModel):
|
||||
"""Response for deleted conversation."""
|
||||
|
||||
id: str = Field(..., description="The deleted conversation identifier")
|
||||
object: str = Field(default="conversation.deleted", description="Object type")
|
||||
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemCreateRequest(BaseModel):
|
||||
"""Request body for creating conversation items."""
|
||||
|
||||
items: list[ConversationItem] = Field(
|
||||
...,
|
||||
description="Items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
max_length=20,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemList(BaseModel):
|
||||
"""List of conversation items with pagination."""
|
||||
|
||||
object: str = Field(default="list", description="Object type")
|
||||
data: list[ConversationItem] = Field(..., description="List of conversation items")
|
||||
first_id: str | None = Field(default=None, description="The ID of the first item in the list")
|
||||
last_id: str | None = Field(default=None, description="The ID of the last item in the list")
|
||||
has_more: bool = Field(default=False, description="Whether there are more items available")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemDeletedResource(BaseModel):
|
||||
"""Response for deleted conversation item."""
|
||||
|
||||
id: str = Field(..., description="The deleted item identifier")
|
||||
object: str = Field(default="conversation.item.deleted", description="Object type")
|
||||
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Conversations(Protocol):
|
||||
"""Protocol for conversation management operations."""
|
||||
|
||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_conversation(
|
||||
self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
|
||||
) -> Conversation:
|
||||
"""Create a conversation.
|
||||
|
||||
:param items: Initial items to include in the conversation context.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The created conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Get a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation's metadata with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The updated conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The deleted conversation resource.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param items: Items to include in the conversation context.
|
||||
:returns: List of created items.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
:returns: The conversation item.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list(
|
||||
self,
|
||||
conversation_id: str,
|
||||
after: str | NotGiven = NOT_GIVEN,
|
||||
include: list[ResponseIncludable] | NotGiven = NOT_GIVEN,
|
||||
limit: int | NotGiven = NOT_GIVEN,
|
||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||
) -> ConversationItemList:
|
||||
"""List items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param after: An item ID to list items after, used in pagination.
|
||||
:param include: Specify additional output data to include in the response.
|
||||
:param limit: A limit on the number of objects to be returned (1-100, default 20).
|
||||
:param order: The order to return items in (asc or desc, default desc).
|
||||
:returns: List of conversation items.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
:returns: The deleted item resource.
|
||||
"""
|
||||
...
|
||||
|
|
@ -8,7 +8,7 @@ from typing import Any, Protocol, runtime_checkable
|
|||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
|
|
@ -21,7 +21,8 @@ class DatasetIO(Protocol):
|
|||
# keeping for aligning with inference/safety, but this is not used
|
||||
dataset_store: DatasetStore
|
||||
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
@ -45,7 +46,10 @@ class DatasetIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
|
||||
)
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
|
||||
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
||||
"""Append rows to a dataset.
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
|
@ -146,7 +146,8 @@ class ListDatasetsResponse(BaseModel):
|
|||
|
||||
|
||||
class Datasets(Protocol):
|
||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
|
||||
async def register_dataset(
|
||||
self,
|
||||
purpose: DatasetPurpose,
|
||||
|
|
@ -215,7 +216,8 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def get_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
@ -227,7 +229,8 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def list_datasets(self) -> ListDatasetsResponse:
|
||||
"""List all datasets.
|
||||
|
||||
|
|
@ -235,7 +238,8 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
|||
tool_groups = "tool_groups"
|
||||
files = "files"
|
||||
prompts = "prompts"
|
||||
conversations = "conversations"
|
||||
|
||||
# built-in API
|
||||
inspect = "inspect"
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
|
|
@ -127,6 +128,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files(
|
||||
self,
|
||||
|
|
@ -146,6 +148,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file(
|
||||
self,
|
||||
|
|
@ -159,6 +162,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_file(
|
||||
self,
|
||||
|
|
@ -172,6 +176,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file_content(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -27,14 +27,12 @@ from llama_stack.models.llama.datatypes import (
|
|||
StopReason,
|
||||
ToolCall,
|
||||
ToolDefinition,
|
||||
ToolParamDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
register_schema(ToolCall)
|
||||
register_schema(ToolParamDefinition)
|
||||
register_schema(ToolDefinition)
|
||||
|
||||
from enum import StrEnum
|
||||
|
|
@ -1008,67 +1006,6 @@ class InferenceProvider(Protocol):
|
|||
|
||||
model_store: ModelStore | None = None
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedContent,
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
|
||||
"""Generate a completion for the given content using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param content: The content to generate a completion for.
|
||||
:param sampling_params: (Optional) Parameters to control the sampling strategy.
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: If stream=False, returns a CompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
|
||||
"""
|
||||
...
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: list[Message],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
tool_choice: ToolChoice | None = ToolChoice.auto,
|
||||
tool_prompt_format: ToolPromptFormat | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
|
||||
"""Generate a chat completion for the given messages using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param messages: List of messages in the conversation.
|
||||
:param sampling_params: Parameters to control the sampling strategy.
|
||||
:param tools: (Optional) List of tool definitions available to the model.
|
||||
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||
.. deprecated::
|
||||
Use tool_config instead.
|
||||
:param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
|
||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
|
||||
- `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
|
||||
.. deprecated::
|
||||
Use tool_config instead.
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding. There are two options:
|
||||
- `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format.
|
||||
- `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it.
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:param tool_config: (Optional) Configuration for tool use.
|
||||
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def rerank(
|
||||
self,
|
||||
|
|
@ -1088,6 +1025,7 @@ class InferenceProvider(Protocol):
|
|||
raise NotImplementedError("Reranking is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_completion(
|
||||
self,
|
||||
|
|
@ -1139,6 +1077,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
|
|
@ -1195,6 +1134,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
|
|
@ -1224,6 +1164,7 @@ class Inference(InferenceProvider):
|
|||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_chat_completions(
|
||||
self,
|
||||
|
|
@ -1242,6 +1183,9 @@ class Inference(InferenceProvider):
|
|||
"""
|
||||
raise NotImplementedError("List chat completions is not implemented")
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||
"""Describe a chat completion by its ID.
|
||||
|
|
|
|||
|
|
@ -111,6 +111,14 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||
"""List models using the OpenAI API.
|
||||
|
||||
:returns: A OpenAIListModelsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_model(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ class Safety(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from typing import (
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
|
@ -426,7 +426,14 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/telemetry/traces",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition] | None = None,
|
||||
|
|
@ -445,7 +452,17 @@ class Telemetry(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
"""Get a trace by its ID.
|
||||
|
|
@ -459,8 +476,15 @@ class Telemetry(Protocol):
|
|||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||
"""Get a span by its ID.
|
||||
|
||||
|
|
@ -473,9 +497,16 @@ class Telemetry(Protocol):
|
|||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
deprecated=True,
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
|
|
@ -491,7 +522,14 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(
|
||||
route="/telemetry/spans",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_spans(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
|
@ -507,7 +545,8 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def save_spans_to_dataset(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
|
@ -525,7 +564,17 @@ class Telemetry(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def query_metrics(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
from enum import Enum
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
|
|
@ -19,59 +19,23 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
from .rag_tool import RAGToolRuntime
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolParameter(BaseModel):
|
||||
"""Parameter definition for a tool.
|
||||
|
||||
:param name: Name of the parameter
|
||||
:param parameter_type: Type of the parameter (e.g., string, integer)
|
||||
:param description: Human-readable description of what the parameter does
|
||||
:param required: Whether this parameter is required for tool invocation
|
||||
:param items: Type of the elements when parameter_type is array
|
||||
:param title: (Optional) Title of the parameter
|
||||
:param default: (Optional) Default value for the parameter if not provided
|
||||
"""
|
||||
|
||||
name: str
|
||||
parameter_type: str
|
||||
description: str
|
||||
required: bool = Field(default=True)
|
||||
items: dict | None = None
|
||||
title: str | None = None
|
||||
default: Any | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Tool(Resource):
|
||||
"""A tool that can be invoked by agents.
|
||||
|
||||
:param type: Type of resource, always 'tool'
|
||||
:param toolgroup_id: ID of the tool group this tool belongs to
|
||||
:param description: Human-readable description of what the tool does
|
||||
:param parameters: List of parameters this tool accepts
|
||||
:param metadata: (Optional) Additional metadata about the tool
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.tool] = ResourceType.tool
|
||||
toolgroup_id: str
|
||||
description: str
|
||||
parameters: list[ToolParameter]
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolDef(BaseModel):
|
||||
"""Tool definition used in runtime contexts.
|
||||
|
||||
:param name: Name of the tool
|
||||
:param description: (Optional) Human-readable description of what the tool does
|
||||
:param parameters: (Optional) List of parameters this tool accepts
|
||||
:param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema)
|
||||
:param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema)
|
||||
:param metadata: (Optional) Additional metadata about the tool
|
||||
:param toolgroup_id: (Optional) ID of the tool group this tool belongs to
|
||||
"""
|
||||
|
||||
toolgroup_id: str | None = None
|
||||
name: str
|
||||
description: str | None = None
|
||||
parameters: list[ToolParameter] | None = None
|
||||
input_schema: dict[str, Any] | None = None
|
||||
output_schema: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
|
|
@ -122,7 +86,7 @@ class ToolInvocationResult(BaseModel):
|
|||
|
||||
|
||||
class ToolStore(Protocol):
|
||||
async def get_tool(self, tool_name: str) -> Tool: ...
|
||||
async def get_tool(self, tool_name: str) -> ToolDef: ...
|
||||
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
|
||||
|
||||
|
||||
|
|
@ -135,15 +99,6 @@ class ListToolGroupsResponse(BaseModel):
|
|||
data: list[ToolGroup]
|
||||
|
||||
|
||||
class ListToolsResponse(BaseModel):
|
||||
"""Response containing a list of tools.
|
||||
|
||||
:param data: List of tools
|
||||
"""
|
||||
|
||||
data: list[Tool]
|
||||
|
||||
|
||||
class ListToolDefsResponse(BaseModel):
|
||||
"""Response containing a list of tool definitions.
|
||||
|
||||
|
|
@ -194,11 +149,11 @@ class ToolGroups(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||
"""List tools with optional tool group.
|
||||
|
||||
:param toolgroup_id: The ID of the tool group to list tools for.
|
||||
:returns: A ListToolsResponse.
|
||||
:returns: A ListToolDefsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
|
|
@ -206,11 +161,11 @@ class ToolGroups(Protocol):
|
|||
async def get_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
) -> Tool:
|
||||
) -> ToolDef:
|
||||
"""Get a tool by its name.
|
||||
|
||||
:param tool_name: The name of the tool to get.
|
||||
:returns: A Tool.
|
||||
:returns: A ToolDef.
|
||||
"""
|
||||
...
|
||||
|
||||
|
|
|
|||
|
|
@ -512,6 +512,7 @@ class VectorIO(Protocol):
|
|||
...
|
||||
|
||||
# OpenAI Vector Stores API endpoints
|
||||
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
|
|
@ -538,6 +539,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
|
|
@ -556,6 +558,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
|
|
@ -568,6 +573,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}",
|
||||
method="POST",
|
||||
|
|
@ -590,6 +598,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}",
|
||||
method="DELETE",
|
||||
|
|
@ -606,6 +617,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/search",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/search",
|
||||
method="POST",
|
||||
|
|
@ -638,6 +655,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files",
|
||||
method="POST",
|
||||
|
|
@ -660,6 +683,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files",
|
||||
method="GET",
|
||||
|
|
@ -686,6 +715,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="GET",
|
||||
|
|
@ -704,6 +739,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
method="GET",
|
||||
|
|
@ -722,6 +763,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="POST",
|
||||
|
|
@ -742,6 +789,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="DELETE",
|
||||
|
|
@ -765,6 +818,12 @@ class VectorIO(Protocol):
|
|||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
async def openai_create_vector_store_file_batch(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
|
@ -787,6 +846,12 @@ class VectorIO(Protocol):
|
|||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
async def openai_retrieve_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
|
|
@ -800,6 +865,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
||||
method="GET",
|
||||
|
|
@ -828,6 +899,12 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
||||
method="POST",
|
||||
|
|
|
|||
|
|
@ -6,11 +6,18 @@
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import uvicorn
|
||||
import yaml
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
|
|
@ -146,23 +153,7 @@ class StackRun(Subcommand):
|
|||
# using the current environment packages.
|
||||
if not image_type and not image_name:
|
||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||
from llama_stack.core.server.server import main as server_main
|
||||
|
||||
# Build the server args from the current args passed to the CLI
|
||||
server_args = argparse.Namespace()
|
||||
for arg in vars(args):
|
||||
# If this is a function, avoid passing it
|
||||
# "args" contains:
|
||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
||||
if callable(getattr(args, arg)):
|
||||
continue
|
||||
if arg == "config":
|
||||
server_args.config = str(config_file)
|
||||
else:
|
||||
setattr(server_args, arg, getattr(args, arg))
|
||||
|
||||
# Run the server
|
||||
server_main(server_args)
|
||||
self._uvicorn_run(config_file, args)
|
||||
else:
|
||||
run_args = formulate_run_args(image_type, image_name)
|
||||
|
||||
|
|
@ -184,6 +175,76 @@ class StackRun(Subcommand):
|
|||
|
||||
run_command(run_args)
|
||||
|
||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||
if not config_file:
|
||||
self.parser.error("Config file is required")
|
||||
|
||||
# Set environment variables if provided
|
||||
if args.env:
|
||||
for env_pair in args.env:
|
||||
try:
|
||||
key, value = validate_env_pair(env_pair)
|
||||
logger.info(f"Setting environment variable {key} => {value}")
|
||||
os.environ[key] = value
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {str(e)}")
|
||||
self.parser.error(f"Invalid environment variable format: {env_pair}")
|
||||
|
||||
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
port = args.port or config.server.port
|
||||
host = config.server.host or ["::", "0.0.0.0"]
|
||||
|
||||
# Set the config file in environment so create_app can find it
|
||||
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||
|
||||
uvicorn_config = {
|
||||
"factory": True,
|
||||
"host": host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
if keyfile and certfile:
|
||||
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||
if config.server.tls_cafile:
|
||||
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
logger.info(f"Listening on {host}:{port}")
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
def _start_ui_development_server(self, stack_server_port: int):
|
||||
logger.info("Attempting to start UI development server...")
|
||||
# Check if npm is available
|
||||
|
|
|
|||
|
|
@ -324,14 +324,14 @@ fi
|
|||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the --config flag
|
||||
# If a run config is provided, we use the llama stack CLI
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
|
|
|||
5
llama_stack/core/conversations/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
306
llama_stack/core/conversations/conversations.py
Normal file
|
|
@ -0,0 +1,306 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import secrets
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from openai import NOT_GIVEN
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.conversations.conversations import (
|
||||
Conversation,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
ConversationItemDeletedResource,
|
||||
ConversationItemList,
|
||||
Conversations,
|
||||
Metadata,
|
||||
)
|
||||
from llama_stack.core.datatypes import AccessRule
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import (
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreConfig,
|
||||
sqlstore_impl,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="openai::conversations")
|
||||
|
||||
|
||||
class ConversationServiceConfig(BaseModel):
|
||||
"""Configuration for the built-in conversation service.
|
||||
|
||||
:param conversations_store: SQL store configuration for conversations (defaults to SQLite)
|
||||
:param policy: Access control rules
|
||||
"""
|
||||
|
||||
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
|
||||
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
|
||||
)
|
||||
policy: list[AccessRule] = []
|
||||
|
||||
|
||||
async def get_provider_impl(config: ConversationServiceConfig, deps: dict[Any, Any]):
|
||||
"""Get the conversation service implementation."""
|
||||
impl = ConversationServiceImpl(config, deps)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
||||
|
||||
class ConversationServiceImpl(Conversations):
|
||||
"""Built-in conversation service implementation using AuthorizedSqlStore."""
|
||||
|
||||
def __init__(self, config: ConversationServiceConfig, deps: dict[Any, Any]):
|
||||
self.config = config
|
||||
self.deps = deps
|
||||
self.policy = config.policy
|
||||
|
||||
base_sql_store = sqlstore_impl(config.conversations_store)
|
||||
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the store and create tables."""
|
||||
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
|
||||
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
|
||||
|
||||
await self.sql_store.create_table(
|
||||
"openai_conversations",
|
||||
{
|
||||
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
||||
"created_at": ColumnType.INTEGER,
|
||||
"items": ColumnType.JSON,
|
||||
"metadata": ColumnType.JSON,
|
||||
},
|
||||
)
|
||||
|
||||
await self.sql_store.create_table(
|
||||
"conversation_items",
|
||||
{
|
||||
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
||||
"conversation_id": ColumnType.STRING,
|
||||
"created_at": ColumnType.INTEGER,
|
||||
"item_data": ColumnType.JSON,
|
||||
},
|
||||
)
|
||||
|
||||
async def create_conversation(
|
||||
self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
|
||||
) -> Conversation:
|
||||
"""Create a conversation."""
|
||||
random_bytes = secrets.token_bytes(24)
|
||||
conversation_id = f"conv_{random_bytes.hex()}"
|
||||
created_at = int(time.time())
|
||||
|
||||
record_data = {
|
||||
"id": conversation_id,
|
||||
"created_at": created_at,
|
||||
"items": [],
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
await self.sql_store.insert(
|
||||
table="openai_conversations",
|
||||
data=record_data,
|
||||
)
|
||||
|
||||
if items:
|
||||
item_records = []
|
||||
for item in items:
|
||||
item_dict = item.model_dump()
|
||||
item_id = self._get_or_generate_item_id(item, item_dict)
|
||||
|
||||
item_record = {
|
||||
"id": item_id,
|
||||
"conversation_id": conversation_id,
|
||||
"created_at": created_at,
|
||||
"item_data": item_dict,
|
||||
}
|
||||
|
||||
item_records.append(item_record)
|
||||
|
||||
await self.sql_store.insert(table="conversation_items", data=item_records)
|
||||
|
||||
conversation = Conversation(
|
||||
id=conversation_id,
|
||||
created_at=created_at,
|
||||
metadata=metadata,
|
||||
object="conversation",
|
||||
)
|
||||
|
||||
logger.info(f"Created conversation {conversation_id}")
|
||||
return conversation
|
||||
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Get a conversation with the given ID."""
|
||||
record = await self.sql_store.fetch_one(table="openai_conversations", where={"id": conversation_id})
|
||||
|
||||
if record is None:
|
||||
raise ValueError(f"Conversation {conversation_id} not found")
|
||||
|
||||
return Conversation(
|
||||
id=record["id"], created_at=record["created_at"], metadata=record.get("metadata"), object="conversation"
|
||||
)
|
||||
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation's metadata with the given ID"""
|
||||
await self.sql_store.update(
|
||||
table="openai_conversations", data={"metadata": metadata}, where={"id": conversation_id}
|
||||
)
|
||||
|
||||
return await self.get_conversation(conversation_id)
|
||||
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation with the given ID."""
|
||||
await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id})
|
||||
|
||||
logger.info(f"Deleted conversation {conversation_id}")
|
||||
return ConversationDeletedResource(id=conversation_id)
|
||||
|
||||
def _validate_conversation_id(self, conversation_id: str) -> None:
|
||||
"""Validate conversation ID format."""
|
||||
if not conversation_id.startswith("conv_"):
|
||||
raise ValueError(
|
||||
f"Invalid 'conversation_id': '{conversation_id}'. Expected an ID that begins with 'conv_'."
|
||||
)
|
||||
|
||||
def _get_or_generate_item_id(self, item: ConversationItem, item_dict: dict) -> str:
|
||||
"""Get existing item ID or generate one if missing."""
|
||||
if item.id is None:
|
||||
random_bytes = secrets.token_bytes(24)
|
||||
if item.type == "message":
|
||||
item_id = f"msg_{random_bytes.hex()}"
|
||||
else:
|
||||
item_id = f"item_{random_bytes.hex()}"
|
||||
item_dict["id"] = item_id
|
||||
return item_id
|
||||
return item.id
|
||||
|
||||
async def _get_validated_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Validate conversation ID and return the conversation if it exists."""
|
||||
self._validate_conversation_id(conversation_id)
|
||||
return await self.get_conversation(conversation_id)
|
||||
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create (add) items to a conversation."""
|
||||
await self._get_validated_conversation(conversation_id)
|
||||
|
||||
created_items = []
|
||||
created_at = int(time.time())
|
||||
|
||||
for item in items:
|
||||
item_dict = item.model_dump()
|
||||
item_id = self._get_or_generate_item_id(item, item_dict)
|
||||
|
||||
item_record = {
|
||||
"id": item_id,
|
||||
"conversation_id": conversation_id,
|
||||
"created_at": created_at,
|
||||
"item_data": item_dict,
|
||||
}
|
||||
|
||||
# TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
|
||||
try:
|
||||
await self.sql_store.insert(table="conversation_items", data=item_record)
|
||||
except Exception:
|
||||
# If insert fails due to ID conflict, update existing record
|
||||
await self.sql_store.update(
|
||||
table="conversation_items",
|
||||
data={"created_at": created_at, "item_data": item_dict},
|
||||
where={"id": item_id},
|
||||
)
|
||||
|
||||
created_items.append(item_dict)
|
||||
|
||||
logger.info(f"Created {len(created_items)} items in conversation {conversation_id}")
|
||||
|
||||
# Convert created items (dicts) to proper ConversationItem types
|
||||
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||
response_items: list[ConversationItem] = [adapter.validate_python(item_dict) for item_dict in created_items]
|
||||
|
||||
return ConversationItemList(
|
||||
data=response_items,
|
||||
first_id=created_items[0]["id"] if created_items else None,
|
||||
last_id=created_items[-1]["id"] if created_items else None,
|
||||
has_more=False,
|
||||
)
|
||||
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve a conversation item."""
|
||||
if not conversation_id:
|
||||
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
||||
if not item_id:
|
||||
raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
|
||||
|
||||
# Get item from conversation_items table
|
||||
record = await self.sql_store.fetch_one(
|
||||
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||
)
|
||||
|
||||
if record is None:
|
||||
raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
|
||||
|
||||
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||
return adapter.validate_python(record["item_data"])
|
||||
|
||||
async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN):
|
||||
"""List items in the conversation."""
|
||||
result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
|
||||
records = result.data
|
||||
|
||||
if order != NOT_GIVEN and order == "asc":
|
||||
records.sort(key=lambda x: x["created_at"])
|
||||
else:
|
||||
records.sort(key=lambda x: x["created_at"], reverse=True)
|
||||
|
||||
actual_limit = 20
|
||||
if limit != NOT_GIVEN and isinstance(limit, int):
|
||||
actual_limit = limit
|
||||
|
||||
records = records[:actual_limit]
|
||||
items = [record["item_data"] for record in records]
|
||||
|
||||
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||
response_items: list[ConversationItem] = [adapter.validate_python(item) for item in items]
|
||||
|
||||
first_id = response_items[0].id if response_items else None
|
||||
last_id = response_items[-1].id if response_items else None
|
||||
|
||||
return ConversationItemList(
|
||||
data=response_items,
|
||||
first_id=first_id,
|
||||
last_id=last_id,
|
||||
has_more=False,
|
||||
)
|
||||
|
||||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete a conversation item."""
|
||||
if not conversation_id:
|
||||
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
||||
if not item_id:
|
||||
raise ValueError(f"Expected a non-empty value for `item_id` but received {item_id!r}")
|
||||
|
||||
_ = await self._get_validated_conversation(conversation_id)
|
||||
|
||||
record = await self.sql_store.fetch_one(
|
||||
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||
)
|
||||
|
||||
if record is None:
|
||||
raise ValueError(f"Item {item_id} not found in conversation {conversation_id}")
|
||||
|
||||
await self.sql_store.delete(
|
||||
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||
)
|
||||
|
||||
logger.info(f"Deleted item {item_id} from conversation {conversation_id}")
|
||||
return ConversationItemDeletedResource(id=item_id)
|
||||
|
|
@ -22,7 +22,7 @@ from llama_stack.apis.safety import Safety
|
|||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||
from llama_stack.apis.shields import Shield, ShieldInput
|
||||
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
|
|
@ -84,15 +84,11 @@ class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
|
|||
pass
|
||||
|
||||
|
||||
class ToolWithOwner(Tool, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup
|
||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||
|
||||
RoutableObjectWithProvider = Annotated[
|
||||
ModelWithOwner
|
||||
|
|
@ -101,7 +97,6 @@ RoutableObjectWithProvider = Annotated[
|
|||
| DatasetWithOwner
|
||||
| ScoringFnWithOwner
|
||||
| BenchmarkWithOwner
|
||||
| ToolWithOwner
|
||||
| ToolGroupWithOwner,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
|
@ -480,6 +475,13 @@ InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (depreca
|
|||
If not specified, a default SQLite store will be used.""",
|
||||
)
|
||||
|
||||
conversations_store: SqlStoreConfig | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Configuration for the persistence store used by the conversations API.
|
||||
If not specified, a default SQLite store will be used.""",
|
||||
)
|
||||
|
||||
# registry of "resources" in the distribution
|
||||
models: list[ModelInput] = Field(default_factory=list)
|
||||
shields: list[ShieldInput] = Field(default_factory=list)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
|
|||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
||||
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts}
|
||||
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations}
|
||||
|
||||
|
||||
def stack_apis() -> list[Api]:
|
||||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
|||
spec = module.get_provider_spec()
|
||||
else:
|
||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
spec = ProviderSpec(
|
||||
api=Api(provider_api),
|
||||
provider_type=provider.provider_type,
|
||||
|
|
@ -251,9 +252,20 @@ def get_external_providers_from_module(
|
|||
config_class="",
|
||||
)
|
||||
provider_type = provider.provider_type
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
# return a partially filled out spec that the build script will populate.
|
||||
registry[Api(provider_api)][provider_type] = spec
|
||||
if isinstance(spec, list):
|
||||
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||
# with the old method, users could pass in directories of specs using overlapping code
|
||||
# we want to ensure we preserve that flexibility in this method.
|
||||
logger.info(
|
||||
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||
)
|
||||
for provider_spec in spec:
|
||||
if provider_spec.provider_type != provider.provider_type:
|
||||
continue
|
||||
logger.info(f"Adding {provider.provider_type} to registry")
|
||||
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||
else:
|
||||
registry[Api(provider_api)][provider_type] = spec
|
||||
except ModuleNotFoundError as exc:
|
||||
raise ValueError(
|
||||
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
||||
|
|
|
|||
|
|
@ -374,6 +374,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
body = options.params or {}
|
||||
body |= options.json_data or {}
|
||||
|
||||
# Merge extra_json parameters (extra_body from SDK is converted to extra_json)
|
||||
if hasattr(options, "extra_json") and options.extra_json:
|
||||
body |= options.extra_json
|
||||
|
||||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||
body |= path_params
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from typing import Any
|
|||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.datatypes import ExternalApiSpec
|
||||
|
|
@ -96,6 +97,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|||
Api.tool_runtime: ToolRuntime,
|
||||
Api.files: Files,
|
||||
Api.prompts: Prompts,
|
||||
Api.conversations: Conversations,
|
||||
}
|
||||
|
||||
if external_apis:
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ from llama_stack.apis.inference import (
|
|||
CompletionResponseStreamChunk,
|
||||
Inference,
|
||||
ListOpenAIChatCompletionResponse,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
|
|
@ -42,12 +41,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
Order,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
StopReason,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
|
|
@ -185,129 +179,6 @@ class InferenceRouter(Inference):
|
|||
raise ModelTypeError(model_id, model.model_type, expected_model_type)
|
||||
return model
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: list[Message],
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
tools: list[ToolDefinition] | None = None,
|
||||
tool_choice: ToolChoice | None = None,
|
||||
tool_prompt_format: ToolPromptFormat | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
|
||||
logger.debug(
|
||||
f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
|
||||
)
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
model = await self._get_model(model_id, ModelType.llm)
|
||||
if tool_config:
|
||||
if tool_choice and tool_choice != tool_config.tool_choice:
|
||||
raise ValueError("tool_choice and tool_config.tool_choice must match")
|
||||
if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
|
||||
raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
|
||||
else:
|
||||
params = {}
|
||||
if tool_choice:
|
||||
params["tool_choice"] = tool_choice
|
||||
if tool_prompt_format:
|
||||
params["tool_prompt_format"] = tool_prompt_format
|
||||
tool_config = ToolConfig(**params)
|
||||
|
||||
tools = tools or []
|
||||
if tool_config.tool_choice == ToolChoice.none:
|
||||
tools = []
|
||||
elif tool_config.tool_choice == ToolChoice.auto:
|
||||
pass
|
||||
elif tool_config.tool_choice == ToolChoice.required:
|
||||
pass
|
||||
else:
|
||||
# verify tool_choice is one of the tools
|
||||
tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
|
||||
if tool_config.tool_choice not in tool_names:
|
||||
raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
|
||||
|
||||
params = dict(
|
||||
model_id=model_id,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
tool_prompt_format=tool_prompt_format,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
tool_config=tool_config,
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(model_id)
|
||||
prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
|
||||
|
||||
if stream:
|
||||
response_stream = await provider.chat_completion(**params)
|
||||
return self.stream_tokens_and_compute_metrics(
|
||||
response=response_stream,
|
||||
prompt_tokens=prompt_tokens,
|
||||
model=model,
|
||||
tool_prompt_format=tool_config.tool_prompt_format,
|
||||
)
|
||||
|
||||
response = await provider.chat_completion(**params)
|
||||
metrics = await self.count_tokens_and_compute_metrics(
|
||||
response=response,
|
||||
prompt_tokens=prompt_tokens,
|
||||
model=model,
|
||||
tool_prompt_format=tool_config.tool_prompt_format,
|
||||
)
|
||||
# these metrics will show up in the client response.
|
||||
response.metrics = (
|
||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||
)
|
||||
return response
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedContent,
|
||||
sampling_params: SamplingParams | None = None,
|
||||
response_format: ResponseFormat | None = None,
|
||||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> AsyncGenerator:
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
logger.debug(
|
||||
f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
|
||||
)
|
||||
model = await self._get_model(model_id, ModelType.llm)
|
||||
provider = await self.routing_table.get_provider_impl(model_id)
|
||||
params = dict(
|
||||
model_id=model_id,
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
prompt_tokens = await self._count_tokens(content)
|
||||
response = await provider.completion(**params)
|
||||
if stream:
|
||||
return self.stream_tokens_and_compute_metrics(
|
||||
response=response,
|
||||
prompt_tokens=prompt_tokens,
|
||||
model=model,
|
||||
)
|
||||
|
||||
metrics = await self.count_tokens_and_compute_metrics(
|
||||
response=response, prompt_tokens=prompt_tokens, model=model
|
||||
)
|
||||
response.metrics = metrics if response.metrics is None else response.metrics + metrics
|
||||
|
||||
return response
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from llama_stack.apis.common.content_types import (
|
|||
InterleavedContent,
|
||||
)
|
||||
from llama_stack.apis.tools import (
|
||||
ListToolsResponse,
|
||||
ListToolDefsResponse,
|
||||
RAGDocument,
|
||||
RAGQueryConfig,
|
||||
RAGQueryResult,
|
||||
|
|
@ -86,6 +86,6 @@ class ToolRuntimeRouter(ToolRuntime):
|
|||
|
||||
async def list_runtime_tools(
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||
) -> ListToolsResponse:
|
||||
) -> ListToolDefsResponse:
|
||||
logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
|
||||
return await self.routing_table.list_tools(tool_group_id)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
||||
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
||||
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -27,7 +27,7 @@ def parse_toolgroup_from_toolgroup_name_pair(toolgroup_name_with_maybe_tool_name
|
|||
|
||||
|
||||
class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||
toolgroups_to_tools: dict[str, list[Tool]] = {}
|
||||
toolgroups_to_tools: dict[str, list[ToolDef]] = {}
|
||||
tool_to_toolgroup: dict[str, str] = {}
|
||||
|
||||
# overridden
|
||||
|
|
@ -43,7 +43,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
routing_key = self.tool_to_toolgroup[routing_key]
|
||||
return await super().get_provider_impl(routing_key, provider_id)
|
||||
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||
if toolgroup_id:
|
||||
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
||||
toolgroup_id = group_id
|
||||
|
|
@ -68,30 +68,19 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
continue
|
||||
all_tools.extend(self.toolgroups_to_tools[toolgroup.identifier])
|
||||
|
||||
return ListToolsResponse(data=all_tools)
|
||||
return ListToolDefsResponse(data=all_tools)
|
||||
|
||||
async def _index_tools(self, toolgroup: ToolGroup):
|
||||
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
||||
tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
|
||||
|
||||
# TODO: kill this Tool vs ToolDef distinction
|
||||
tooldefs = tooldefs_response.data
|
||||
tools = []
|
||||
for t in tooldefs:
|
||||
tools.append(
|
||||
Tool(
|
||||
identifier=t.name,
|
||||
toolgroup_id=toolgroup.identifier,
|
||||
description=t.description or "",
|
||||
parameters=t.parameters or [],
|
||||
metadata=t.metadata,
|
||||
provider_id=toolgroup.provider_id,
|
||||
)
|
||||
)
|
||||
t.toolgroup_id = toolgroup.identifier
|
||||
|
||||
self.toolgroups_to_tools[toolgroup.identifier] = tools
|
||||
for tool in tools:
|
||||
self.tool_to_toolgroup[tool.identifier] = toolgroup.identifier
|
||||
self.toolgroups_to_tools[toolgroup.identifier] = tooldefs
|
||||
for tool in tooldefs:
|
||||
self.tool_to_toolgroup[tool.name] = toolgroup.identifier
|
||||
|
||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||
return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group"))
|
||||
|
|
@ -102,12 +91,12 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
raise ToolGroupNotFoundError(toolgroup_id)
|
||||
return tool_group
|
||||
|
||||
async def get_tool(self, tool_name: str) -> Tool:
|
||||
async def get_tool(self, tool_name: str) -> ToolDef:
|
||||
if tool_name in self.tool_to_toolgroup:
|
||||
toolgroup_id = self.tool_to_toolgroup[tool_name]
|
||||
tools = self.toolgroups_to_tools[toolgroup_id]
|
||||
for tool in tools:
|
||||
if tool.identifier == tool_name:
|
||||
if tool.name == tool_name:
|
||||
return tool
|
||||
raise ValueError(f"Tool '{tool_name}' not found")
|
||||
|
||||
|
|
@ -132,7 +121,6 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
# baked in some of the code and tests right now.
|
||||
if not toolgroup.mcp_endpoint:
|
||||
await self._index_tools(toolgroup)
|
||||
return toolgroup
|
||||
|
||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
||||
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import functools
|
||||
|
|
@ -12,7 +11,6 @@ import inspect
|
|||
import json
|
||||
import logging # allow-direct-logging
|
||||
import os
|
||||
import ssl
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
|||
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationRequiredError,
|
||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
|||
Stack,
|
||||
cast_image_name_to_string,
|
||||
replace_env_vars,
|
||||
validate_env_pair,
|
||||
)
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
|
@ -257,7 +253,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
|
||||
return result
|
||||
except Exception as e:
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
if logger.isEnabledFor(logging.INFO):
|
||||
logger.exception(f"Error executing endpoint {route=} {method=}")
|
||||
else:
|
||||
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
|||
return await self.app(scope, receive, send)
|
||||
|
||||
|
||||
def create_app(
|
||||
config_file: str | None = None,
|
||||
env_vars: list[str] | None = None,
|
||||
) -> StackApp:
|
||||
def create_app() -> StackApp:
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
Args:
|
||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
||||
env_vars: List of environment variables in KEY=value format.
|
||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
||||
This factory function reads configuration from environment variables:
|
||||
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||
|
||||
Returns:
|
||||
Configured StackApp instance.
|
||||
"""
|
||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
||||
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||
if config_file is None:
|
||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
||||
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||
|
||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||
|
||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
|||
logger_config = LoggingConfig(**cfg)
|
||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||
|
||||
if env_vars:
|
||||
for env_pair in env_vars:
|
||||
try:
|
||||
key, value = validate_env_pair(env_pair)
|
||||
logger.info(f"Setting environment variable {key} => {value}")
|
||||
os.environ[key] = value
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {str(e)}")
|
||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
||||
|
||||
config = replace_env_vars(config_contents)
|
||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||
|
||||
|
|
@ -451,6 +432,7 @@ def create_app(
|
|||
apis_to_serve.add("inspect")
|
||||
apis_to_serve.add("providers")
|
||||
apis_to_serve.add("prompts")
|
||||
apis_to_serve.add("conversations")
|
||||
for api_str in apis_to_serve:
|
||||
api = Api(api_str)
|
||||
|
||||
|
|
@ -493,101 +475,6 @@ def create_app(
|
|||
return app
|
||||
|
||||
|
||||
def main(args: argparse.Namespace | None = None):
|
||||
"""Start the LlamaStack server."""
|
||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
||||
|
||||
add_config_distro_args(parser)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
||||
help="Port to listen on",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--env",
|
||||
action="append",
|
||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
||||
)
|
||||
|
||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
||||
# parsed from the command line
|
||||
if args is None:
|
||||
args = parser.parse_args()
|
||||
|
||||
config_or_distro = get_config_from_args(args)
|
||||
|
||||
try:
|
||||
app = create_app(
|
||||
config_file=config_or_distro,
|
||||
env_vars=args.env,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating app: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
import uvicorn
|
||||
|
||||
# Configure SSL if certificates are provided
|
||||
port = args.port or config.server.port
|
||||
|
||||
ssl_config = None
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
|
||||
if keyfile and certfile:
|
||||
ssl_config = {
|
||||
"ssl_keyfile": keyfile,
|
||||
"ssl_certfile": certfile,
|
||||
}
|
||||
if config.server.tls_cafile:
|
||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
||||
logger.info(f"Listening on {listen_host}:{port}")
|
||||
|
||||
uvicorn_config = {
|
||||
"app": app,
|
||||
"host": listen_host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
if ssl_config:
|
||||
uvicorn_config.update(ssl_config)
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
|
||||
def _log_run_config(run_config: StackRunConfig):
|
||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||
logger.info("Run configuration:")
|
||||
|
|
@ -614,7 +501,3 @@ def remove_disabled_providers(obj):
|
|||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||